PyPI - sdgym - Versions diffs - 0.12.2.dev0__tar.gz → 0.13.1.dev0__tar.gz - Mend

sdgym 0.12.2.dev0tar.gz → 0.13.1.dev0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

{sdgym-0.12.2.dev0/sdgym.egg-info → sdgym-0.13.1.dev0}/PKG-INFO RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.4
 Name: sdgym
-Version: 0.12.2.dev0
+Version: 0.13.1.dev0
 Summary: Benchmark tabular synthetic data generators using a variety of datasets
 Author-email: "DataCebo, Inc." <info@sdv.dev>
-License: BSL-1.1
+License-Expression: BUSL-1.1
 Project-URL: Source Code, https://github.com/sdv-dev/SDGym/
 Project-URL: Issue Tracker, https://github.com/sdv-dev/SDGym/issues
 Project-URL: Changes, https://github.com/sdv-dev/SDGym/blob/main/HISTORY.md
@@ -12,7 +12,6 @@ Project-URL: Chat, https://bit.ly/sdv-slack-invite
 Keywords: machine learning,synthetic data generation,benchmark,generative models
 Classifier: Development Status :: 2 - Pre-Alpha
 Classifier: Intended Audience :: Developers
-Classifier: License :: Free for non-commercial use
 Classifier: Natural Language :: English
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
@@ -20,44 +19,57 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Python: <3.14,>=3.9
+Requires-Python: <3.15,>=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: appdirs>=1.3
 Requires-Dist: boto3<2,>=1.28
 Requires-Dist: botocore<2,>=1.31
-Requires-Dist: cloudpickle>=2.1.0
+Requires-Dist: cloudpickle>=2.1.0; python_version < "3.14"
+Requires-Dist: cloudpickle>=3.1.1; python_version >= "3.14"
 Requires-Dist: compress-pickle>=1.2.0
-Requires-Dist: google-cloud-compute>=1.0.0
-Requires-Dist: google-auth>=2.0.0
+Requires-Dist: google-cloud-compute>=1.30.0
+Requires-Dist: google-auth>=2.14.1
 Requires-Dist: humanfriendly>=10.0
 Requires-Dist: numpy>=1.22.2; python_version < "3.10"
 Requires-Dist: numpy>=1.24.0; python_version >= "3.10" and python_version < "3.12"
 Requires-Dist: numpy>=1.26.0; python_version >= "3.12" and python_version < "3.13"
-Requires-Dist: numpy>=2.1.0; python_version >= "3.13"
+Requires-Dist: numpy>=2.1.0; python_version >= "3.13" and python_version < "3.14"
+Requires-Dist: numpy>=2.3.2; python_version >= "3.14"
 Requires-Dist: openpyxl>=3.1.2
-Requires-Dist: pandas<3.0.0,>=1.4.0; python_version < "3.11"
-Requires-Dist: pandas<3.0.0,>=1.5.0; python_version >= "3.11" and python_version < "3.12"
-Requires-Dist: pandas<3.0.0,>=2.1.1; python_version >= "3.12" and python_version < "3.13"
-Requires-Dist: pandas<3.0.0,>=2.2.3; python_version >= "3.13"
-Requires-Dist: psutil>=5.7
+Requires-Dist: pandas<3,>=1.4.0; python_version < "3.11"
+Requires-Dist: pandas<3,>=1.5.0; python_version >= "3.11" and python_version < "3.12"
+Requires-Dist: pandas<3,>=2.1.1; python_version >= "3.12" and python_version < "3.13"
+Requires-Dist: pandas<3,>=2.2.3; python_version >= "3.13" and python_version < "3.14"
+Requires-Dist: pandas<3,>=2.3.3; python_version >= "3.14"
+Requires-Dist: psutil>=5.8
 Requires-Dist: scikit-learn>=1.0.2; python_version < "3.10"
 Requires-Dist: scikit-learn>=1.1.0; python_version >= "3.10" and python_version < "3.11"
 Requires-Dist: scikit-learn>=1.1.3; python_version >= "3.11" and python_version < "3.12"
 Requires-Dist: scikit-learn>=1.3.1; python_version >= "3.12" and python_version < "3.13"
-Requires-Dist: scikit-learn>=1.5.2; python_version >= "3.13"
+Requires-Dist: scikit-learn>=1.5.2; python_version >= "3.13" and python_version < "3.14"
+Requires-Dist: scikit-learn>=1.8.0; python_version >= "3.14"
 Requires-Dist: scipy>=1.7.3; python_version < "3.10"
 Requires-Dist: scipy>=1.9.2; python_version >= "3.10" and python_version < "3.12"
 Requires-Dist: scipy>=1.12.0; python_version >= "3.12" and python_version < "3.13"
-Requires-Dist: scipy>=1.14.1; python_version >= "3.13"
+Requires-Dist: scipy>=1.14.1; python_version >= "3.13" and python_version < "3.14"
+Requires-Dist: scipy>=1.16.1; python_version >= "3.14"
 Requires-Dist: tabulate<0.9,>=0.8.3
-Requires-Dist: torch>=2.6.0
+Requires-Dist: torch>=1.13.0; python_version < "3.11"
+Requires-Dist: torch>=2.0.0; python_version >= "3.11" and python_version < "3.12"
+Requires-Dist: torch>=2.3.0; python_version >= "3.12" and python_version < "3.13"
+Requires-Dist: torch>=2.6.0; python_version >= "3.13" and python_version < "3.14"
+Requires-Dist: torch>=2.9.0; python_version >= "3.14"
 Requires-Dist: tqdm>=4.66.3
 Requires-Dist: XlsxWriter>=1.2.8
-Requires-Dist: rdt>=1.17.0
-Requires-Dist: sdmetrics>=0.20.1
-Requires-Dist: sdv>=1.21.0
+Requires-Dist: rdt>=1.18.2; python_version < "3.14"
+Requires-Dist: rdt>=1.20.0; python_version >= "3.14"
+Requires-Dist: sdmetrics>=0.21.0; python_version < "3.14"
+Requires-Dist: sdmetrics>=0.26.0; python_version >= "3.14"
+Requires-Dist: sdv>=1.21.0; python_version < "3.14"
+Requires-Dist: sdv>=1.33.0; python_version >= "3.14"
 Provides-Extra: dask
 Requires-Dist: dask; extra == "dask"
 Requires-Dist: distributed; extra == "dask"

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,6 @@ authors = [{ name = 'DataCebo, Inc.', email = 'info@sdv.dev' }]
 classifiers = [
     'Development Status :: 2 - Pre-Alpha',
     'Intended Audience :: Developers',
-    'License :: Free for non-commercial use',
     'Natural Language :: English',
     'Programming Language :: Python :: 3',
     'Programming Language :: Python :: 3.9',
@@ -13,48 +12,62 @@ classifiers = [
     'Programming Language :: Python :: 3.11',
     'Programming Language :: Python :: 3.12',
     'Programming Language :: Python :: 3.13',
+    'Programming Language :: Python :: 3.14',
     'Topic :: Scientific/Engineering :: Artificial Intelligence',
 ]
 keywords = ['machine learning', 'synthetic data generation', 'benchmark', 'generative models']
 dynamic = ['version']
-license = { text = 'BSL-1.1' }
-requires-python = '>=3.9,<3.14'
+license = 'BUSL-1.1'
+license-files = ['LICENSE']
+requires-python = '>=3.9,<3.15'
 readme = 'README.md'
 dependencies = [
     'appdirs>=1.3',
     'boto3>=1.28,<2',
     'botocore>=1.31,<2',
-    'cloudpickle>=2.1.0',
+    "cloudpickle>=2.1.0;python_version<'3.14'",
+    "cloudpickle>=3.1.1;python_version>='3.14'",
     'compress-pickle>=1.2.0',
-    'google-cloud-compute>=1.0.0',
-    'google-auth>=2.0.0',
+    'google-cloud-compute>=1.30.0',
+    'google-auth>=2.14.1',
     'humanfriendly>=10.0',
     "numpy>=1.22.2;python_version<'3.10'",
     "numpy>=1.24.0;python_version>='3.10' and python_version<'3.12'",
     "numpy>=1.26.0;python_version>='3.12' and python_version<'3.13'",
-    "numpy>=2.1.0;python_version>='3.13'",
+    "numpy>=2.1.0;python_version>='3.13' and python_version<'3.14'",
+    "numpy>=2.3.2;python_version>='3.14'",
     'openpyxl>=3.1.2',
-    "pandas>=1.4.0,<3.0.0;python_version<'3.11'",
-    "pandas>=1.5.0,<3.0.0;python_version>='3.11' and python_version<'3.12'",
-    "pandas>=2.1.1,<3.0.0;python_version>='3.12' and python_version<'3.13'",
-    "pandas>=2.2.3,<3.0.0;python_version>='3.13'",
-    'psutil>=5.7',
+    "pandas>=1.4.0,<3;python_version<'3.11'",
+    "pandas>=1.5.0,<3;python_version>='3.11' and python_version<'3.12'",
+    "pandas>=2.1.1,<3;python_version>='3.12' and python_version<'3.13'",
+    "pandas>=2.2.3,<3;python_version>='3.13' and python_version<'3.14'",
+    "pandas>=2.3.3,<3;python_version>='3.14'",
+    'psutil>=5.8',
     "scikit-learn>=1.0.2;python_version<'3.10'",
     "scikit-learn>=1.1.0;python_version>='3.10' and python_version<'3.11'",
     "scikit-learn>=1.1.3;python_version>='3.11' and python_version<'3.12'",
     "scikit-learn>=1.3.1;python_version>='3.12' and python_version<'3.13'",
-    "scikit-learn>=1.5.2;python_version>='3.13'",
+    "scikit-learn>=1.5.2;python_version>='3.13' and python_version<'3.14'",
+    "scikit-learn>=1.8.0;python_version>='3.14'",
     "scipy>=1.7.3;python_version<'3.10'",
     "scipy>=1.9.2;python_version>='3.10' and python_version<'3.12'",
     "scipy>=1.12.0;python_version>='3.12' and python_version<'3.13'",
-    "scipy>=1.14.1;python_version>='3.13'",
+    "scipy>=1.14.1;python_version>='3.13' and python_version<'3.14'",
+    "scipy>=1.16.1;python_version>='3.14'",
     'tabulate>=0.8.3,<0.9',
-    "torch>=2.6.0",
+    "torch>=1.13.0;python_version<'3.11'",
+    "torch>=2.0.0;python_version>='3.11' and python_version<'3.12'",
+    "torch>=2.3.0;python_version>='3.12' and python_version<'3.13'",
+    "torch>=2.6.0;python_version>='3.13' and python_version<'3.14'",
+    "torch>=2.9.0;python_version>='3.14'",
     'tqdm>=4.66.3',
     'XlsxWriter>=1.2.8',
-    'rdt>=1.17.0',
-    'sdmetrics>=0.20.1',
-    'sdv>=1.21.0',
+    "rdt>=1.18.2;python_version<'3.14'",
+    "rdt>=1.20.0;python_version>='3.14'",
+    "sdmetrics>=0.21.0;python_version<'3.14'",
+    "sdmetrics>=0.26.0;python_version>='3.14'",
+    "sdv>=1.21.0;python_version<'3.14'",
+    "sdv>=1.33.0;python_version>='3.14'",
 ]
 [project.urls]
@@ -113,7 +126,6 @@ all = [
 [tool.setuptools]
 include-package-data = true
-license-files = ['LICENSE']
 [tool.setuptools.packages.find]
 include = ['sdgym', 'sdgym.*']
@@ -149,7 +161,7 @@ namespaces = false
 version = {attr = 'sdgym.__version__'}
 [tool.bumpversion]
-current_version = "0.12.2.dev0"
+current_version = "0.13.1.dev0"
 parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
 serialize = [
     '{major}.{minor}.{patch}.{release}{candidate}',

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/__init__.py RENAMED Viewed

@@ -8,7 +8,7 @@ __author__ = 'DataCebo, Inc.'
 __copyright__ = 'Copyright (c) 2022 DataCebo, Inc.'
 __email__ = 'info@sdv.dev'
 __license__ = 'BSL-1.1'
-__version__ = '0.12.2.dev0'
+__version__ = '0.13.1.dev0'
 import logging

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/_benchmark/credentials_utils.py RENAMED Viewed

@@ -74,7 +74,7 @@ def sdv_install_cmd(credentials):
 pip install sdv-installer
 python -c "from sdv_installer.installation.installer import install_packages; \\
-install_packages(username='{username}', license_key='{license_key}', package='sdv-enterprise')"
+install_packages(username='{username}', license_key='{license_key}')"
 """)

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/benchmark.py RENAMED Viewed

@@ -514,7 +514,7 @@ def _compute_scores(
         for metric_name, metric in metrics.items():
             scores.append({
                 'metric': metric_name,
-                'error': 'Metric Timeout',
+                'Error': 'Metric Timeout',
             })
             # re-inject list to multiprocessing output
             output['scores'] = scores
@@ -537,7 +537,7 @@ def _compute_scores(
             scores[-1].update({
                 'score': score,
                 'normalized_score': normalized_score,
-                'error': error,
+                'Error': error,
                 'metric_time': calculate_score_time(start),
             })
             # re-inject list to multiprocessing output
@@ -603,7 +603,7 @@ def _score(
         output = {}
     output['timeout'] = True  # To be deleted if there is no error
-    output['error'] = 'Load Timeout'  # To be deleted if there is no error
+    output['Error'] = 'Load Timeout'  # To be deleted if there is no error
     try:
         LOGGER.info(
             'Running %s on %s dataset %s; %s',
@@ -615,7 +615,7 @@ def _score(
         output['dataset_size'] = get_size_of(data) / N_BYTES_IN_MB
         # To be deleted if there is no error
-        output['error'] = 'Synthesizer Timeout'
+        output['Error'] = 'Synthesizer Timeout'
         try:
             synthetic_data, train_time, sample_time, synthesizer_size, peak_memory = _synthesize(
@@ -642,7 +642,7 @@ def _score(
             )
             # No error so far. _compute_scores tracks its own errors by metric
-            del output['error']
+            del output['Error']
             _compute_scores(
                 metrics,
                 data,
@@ -671,14 +671,14 @@ def _score(
             output['peak_memory'] = err.peak_memory
             output['exception'] = err.exception
-            output['error'] = err.error
+            output['Error'] = err.error
             output['timeout'] = False
     except Exception:
         LOGGER.exception('Error running %s on dataset %s;', synthesizer['name'], dataset_name)
         exception, error = format_exception()
         output['exception'] = exception
-        output['error'] = error
+        output['Error'] = error
         output['timeout'] = False  # There was no timeout
     finally:
@@ -744,7 +744,7 @@ def _score_with_timeout(
         thread.join(timeout)
         if thread.is_alive():
             LOGGER.error('Timeout running %s on dataset %s;', synthesizer['name'], dataset_name)
-            return {'timeout': True, 'error': 'Synthesizer Timeout'}
+            return {'timeout': True, 'Error': 'Synthesizer Timeout'}
         return output
@@ -815,8 +815,8 @@ def _format_output(
     for score in output.get('scores', []):
         scores.insert(len(scores.columns), score['metric'], score['normalized_score'])
-    if 'error' in output:
-        scores['error'] = output['error']
+    if 'Error' in output:
+        scores['Error'] = output['Error']
     return scores
@@ -1085,8 +1085,8 @@ def _add_adjusted_scores(scores, timeout):
         fit_times = scores.loc[dataset_mask, 'Train_Time'].fillna(0)
         sample_times = scores.loc[dataset_mask, 'Sample_Time'].fillna(0)
-        if 'error' in scores.columns:
-            errors = scores.loc[dataset_mask, 'error']
+        if 'Error' in scores.columns:
+            errors = scores.loc[dataset_mask, 'Error']
         else:
             errors = pd.Series([None] * dataset_mask.sum(), index=scores.index[dataset_mask])

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/cli/__main__.py RENAMED Viewed

@@ -39,13 +39,13 @@ def _print_table(data, sort=None, reverse=False, format=None):
         for field, formatter in format.items():
             data[field] = data[field].apply(formatter)
-    if 'error' in data:
-        error = data['error']
+    if 'Error' in data:
+        error = data['Error']
         if pd.isna(error).all():
-            del data['error']
+            del data['Error']
         else:
             long_error = error.str.len() > 30
-            data.loc[long_error, 'error'] = error[long_error].str[:30] + '...'
+            data.loc[long_error, 'Error'] = error[long_error].str[:30] + '...'
     print(tabulate.tabulate(data, tablefmt='github', headers=data.columns, showindex=False))  # noqa: T201

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/cli/summary.py RENAMED Viewed

@@ -35,11 +35,11 @@ def preprocess(data):
     bydataset = grouped.mean()
     data = bydataset.reset_index()
-    if 'error' in data.columns:
+    if 'Error' in data.columns:
         errors = data.error.fillna('')
         for message, column in KNOWN_ERRORS:
             data[column] = errors.str.contains(message)
-            data.loc[data[column], 'error'] = np.nan
+            data.loc[data[column], 'Error'] = np.nan
     return data
@@ -122,7 +122,7 @@ def summarize(data, baselines=(), datasets=None):
         baseline_scores = baseline_data.set_index('Dataset').Quality_Score
         results[f'beat_{baseline.lower()}'] = _beat_baseline(data, baseline_scores)
-    if 'error' in data.columns:
+    if 'Error' in data.columns:
         grouped = data.groupby('Synthesizer')
         for _, error_column in KNOWN_ERRORS:
             results[error_column] = grouped[error_column].sum()
@@ -135,7 +135,7 @@ def summarize(data, baselines=(), datasets=None):
 def _error_counts(data):
-    if 'error' in data.columns:
+    if 'Error' in data.columns:
         return data.error.value_counts()
     return 0
@@ -158,8 +158,8 @@ def errors_summary(data):
     Returns:
         pandas.DataFrame
     """
-    if 'error' in data.columns:
-        all_errors = pd.DataFrame(_error_counts(data)).rename(columns={'error': 'all'})
+    if 'Error' in data.columns:
+        all_errors = pd.DataFrame(_error_counts(data)).rename(columns={'Error': 'all'})
         synthesizer_errors = data.groupby('Synthesizer').apply(_error_counts).pivot_table(level=0)
         for synthesizer, errors in synthesizer_errors.items():
             all_errors[synthesizer] = errors.fillna(0).astype(int)

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/result_writer.py RENAMED Viewed

@@ -12,6 +12,7 @@ import yaml
 from openpyxl import load_workbook
 from sdgym.s3 import parse_s3_path
+from sdgym.utils import _set_column_width
 class ResultsWriter(ABC):
@@ -79,6 +80,7 @@ class LocalResultsWriter:
         with writer:
             for sheet_name, df in data.items():
                 df.to_excel(writer, sheet_name=sheet_name, index=index)
+                _set_column_width(writer, df, sheet_name)
         wb = load_workbook(file_path)
         for sheet_name in reversed(data.keys()):

sdgym-0.13.1.dev0/sdgym/run_benchmark/run_benchmark.py ADDED Viewed

@@ -0,0 +1,206 @@
+"""Script to run a benchmark and upload results to S3."""
+import json
+import os
+from datetime import datetime, timezone
+from botocore.exceptions import ClientError
+from sdgym._benchmark.benchmark import (
+    _benchmark_multi_table_compute_gcp,
+    _benchmark_single_table_compute_gcp,
+)
+from sdgym.run_benchmark.utils import (
+    KEY_DATE_FILE,
+    OUTPUT_DESTINATION_AWS,
+    _exclude_datasets,
+    _parse_args,
+    get_result_folder_name,
+    post_benchmark_launch_message,
+)
+from sdgym.s3 import get_s3_client, parse_s3_path
+SINGLE_TABLE_DATASETS = [
+    'adult',
+    'alarm',
+    'census',
+    'child',
+    'covtype',
+    'expedia_hotel_logs',
+    'insurance',
+    'intrusion',
+    'news',
+]
+MULTI_TABLE_DATASETS = [
+    'WebKP',
+    'DCG',
+    'UW_std',
+    'Same_gen',
+    'CORA',
+    'got_families',
+    'SalesDB',
+    'UTube',
+    'Student_loan',
+    'Hepatitis_std',
+    'Elti',
+    'Bupa',
+    'Toxicology',
+    'imdb_ijs',
+    'ftp',
+    'imdb_small',
+    'imdb_MovieLens',
+    'Pima',
+    'university',
+    'legalActs',
+    'Dunur',
+    'Mesh',
+    'world',
+    'airbnb-simplified',
+    'trains',
+    'FNHK',
+    'fake_hotels',
+    'SAT',
+    'genes',
+    'Biodegradability',
+    'Pyrimidine',
+    'mutagenesis',
+    'restbase',
+    'Triazine',
+    'Carcinogenesis',
+    'fake_hotels_extended',
+    'Mooney_Family',
+    'PTE',
+    'Facebook',
+    'multi_table_ID_demo_dataset',
+    'SAP',
+    'Chess',
+    'Countries',
+    'NCAA',
+    'Atherosclerosis',
+    'nations',
+    'TubePricing',
+    'financial',
+    'Accidents',
+    'MuskSmall',
+    'NBA',
+    'AustralianFootball',
+    'PremierLeague',
+    'OMOP_CDM_dayz',
+]
+def _get_benchmark_setup(modality):
+    """Get the benchmark setup for a given modality.
+    The setup includes the method to run the benchmark and the job split,
+    which is a list of tuples where each tuple contains a list of synthesizers and
+    a list of datasets to run those synthesizers on.
+    """
+    if modality == 'single_table':
+        real_tab_former_to_exclude = ['covtype', 'intrusion', 'expedia_hotel_logs', 'census']
+        gan_to_exclude = ['covtype', 'intrusion']
+        job_split = [
+            (['ColumnSynthesizer', 'GaussianCopulaSynthesizer'], SINGLE_TABLE_DATASETS),
+            (['TVAESynthesizer'], SINGLE_TABLE_DATASETS),
+            (['SegmentSynthesizer'], SINGLE_TABLE_DATASETS),
+            (['XGCSynthesizer'], SINGLE_TABLE_DATASETS),
+            (['BootstrapSynthesizer'], SINGLE_TABLE_DATASETS),
+            (['CTGANSynthesizer'], _exclude_datasets(SINGLE_TABLE_DATASETS, gan_to_exclude)),
+            (['CopulaGANSynthesizer'], _exclude_datasets(SINGLE_TABLE_DATASETS, gan_to_exclude)),
+            (
+                ['RealTabFormerSynthesizer'],
+                _exclude_datasets(SINGLE_TABLE_DATASETS, real_tab_former_to_exclude),
+            ),
+        ]
+        for dataset in real_tab_former_to_exclude:
+            job_split.append((['RealTabFormerSynthesizer'], [dataset]))
+        for dataset in gan_to_exclude:
+            job_split.append((['CTGANSynthesizer'], [dataset]))
+            job_split.append((['CopulaGANSynthesizer'], [dataset]))
+        return {
+            'method': _benchmark_single_table_compute_gcp,
+            'job_split': job_split,
+        }
+    if modality == 'multi_table':
+        hma_to_exclude = [
+            'Accidents',
+            'AustralianFootball',
+            'Countries',
+            'MuskSmall',
+            'NBA',
+            'OMOP_CDM_dayz',
+            'PremierLeague',
+            'SalesDB',
+            'airbnb-simplified',
+            'imdb_ijs',
+            'legalActs',
+            'SAP',
+            'imdb_MovieLens',
+        ]
+        job_split = [
+            (['HSASynthesizer', 'IndependentSynthesizer'], MULTI_TABLE_DATASETS),
+            (['HMASynthesizer'], _exclude_datasets(MULTI_TABLE_DATASETS, hma_to_exclude)),
+        ]
+        for dataset in hma_to_exclude:
+            job_split.append((['HMASynthesizer'], [dataset]))
+        return {
+            'method': _benchmark_multi_table_compute_gcp,
+            'job_split': job_split,
+        }
+def append_benchmark_run(
+    aws_access_key_id, aws_secret_access_key, date_str, modality='single_table'
+):
+    """Append a new benchmark run to the benchmark dates file in S3."""
+    s3_client = get_s3_client(
+        aws_access_key_id=aws_access_key_id,
+        aws_secret_access_key=aws_secret_access_key,
+    )
+    bucket, prefix = parse_s3_path(OUTPUT_DESTINATION_AWS)
+    try:
+        object = s3_client.get_object(Bucket=bucket, Key=f'{prefix}{modality}/{KEY_DATE_FILE}')
+        body = object['Body'].read().decode('utf-8')
+        data = json.loads(body)
+    except ClientError as e:
+        if e.response['Error']['Code'] == 'NoSuchKey':
+            data = {'runs': []}
+        else:
+            raise RuntimeError(f'Failed to read {KEY_DATE_FILE} from S3: {e}')
+    data['runs'].append({'date': date_str, 'folder_name': get_result_folder_name(date_str)})
+    data['runs'] = sorted(data['runs'], key=lambda x: x['date'])
+    s3_client.put_object(
+        Bucket=bucket,
+        Key=f'{prefix}{modality}/{KEY_DATE_FILE}',
+        Body=json.dumps(data).encode('utf-8'),
+    )
+def main():
+    """Main function to run the benchmark and upload results."""
+    args = _parse_args()
+    aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
+    aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
+    date_str = datetime.now(timezone.utc).strftime('%Y-%m-%d')
+    modality = args.modality
+    benchmark_setup = _get_benchmark_setup(modality)
+    for synthesizers, datasets in benchmark_setup['job_split']:
+        benchmark_setup['method'](
+            output_destination=OUTPUT_DESTINATION_AWS,
+            credential_filepath=os.getenv('CREDENTIALS_FILEPATH'),
+            synthesizers=synthesizers,
+            sdv_datasets=datasets,
+            timeout=345600,  # 4 days
+        )
+    append_benchmark_run(aws_access_key_id, aws_secret_access_key, date_str, modality=modality)
+    post_benchmark_launch_message(date_str, compute_service='GCP', modality=modality)
+if __name__ == '__main__':
+    main()

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/run_benchmark/upload_benchmark_results.py RENAMED Viewed

@@ -29,6 +29,7 @@ from sdgym.run_benchmark.utils import (
     get_df_to_plot,
 )
 from sdgym.s3 import S3_REGION, parse_s3_path
+from sdgym.utils import _set_column_width
 LOGGER = logging.getLogger(__name__)
 SYNTHESIZER_TO_GLOBAL_POSITION = {
@@ -231,7 +232,6 @@ def get_model_details(summary, results, df_to_plot, modality):
     with open(SYNTHESIZER_DESCRIPTION_PATH, 'r', encoding='utf-8') as f:
         synthesizer_info = yaml.safe_load(f) or {}
-    err_column = 'error' if 'error' in results.columns else 'Error'
     paretos_synthesizers = (
         df_to_plot.loc[df_to_plot['Pareto'].eq(True), 'Synthesizer'].astype(str).add('Synthesizer')
     )
@@ -258,18 +258,23 @@ def get_model_details(summary, results, df_to_plot, modality):
     model_details['Number of datasets - Wins'] = (
         model_details['Synthesizer'].map(wins).fillna(0).astype(int)
     )
-    timeout_counts = (
-        results
-        .loc[results[err_column].eq('Synthesizer Timeout')]
-        .groupby('Synthesizer')['Dataset']
-        .nunique()
-    )
-    error_counts = (
-        results
-        .loc[results[err_column].notna() & ~results[err_column].eq('Synthesizer Timeout')]
-        .groupby('Synthesizer')['Dataset']
-        .nunique()
-    )
+    if 'Error' in results.columns:
+        timeout_counts = (
+            results
+            .loc[results['Error'].eq('Synthesizer Timeout')]
+            .groupby('Synthesizer')['Dataset']
+            .nunique()
+        )
+        error_counts = (
+            results
+            .loc[results['Error'].notna() & ~results['Error'].eq('Synthesizer Timeout')]
+            .groupby('Synthesizer')['Dataset']
+            .nunique()
+        )
+    else:
+        timeout_counts = pd.Series(0, index=model_details['Synthesizer'])
+        error_counts = pd.Series(0, index=model_details['Synthesizer'])
     model_details['Number of datasets - Timeout'] = (
         model_details['Synthesizer'].map(timeout_counts).fillna(0).astype(int)
     )
@@ -313,7 +318,8 @@ def update_table_aws(s3_client, bucket, filename, table, reference_column):
     updated_table = pd.concat([existing_table, table], ignore_index=True)
     output = io.BytesIO()
     with pd.ExcelWriter(output, engine='openpyxl') as writer:
-        updated_table.to_excel(writer, index=False)
+        updated_table.to_excel(writer, index=False, sheet_name='Sheet1')
+        _set_column_width(writer, updated_table, 'Sheet1')
     output.seek(0)
     s3_client.upload_fileobj(output, bucket, filename)

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/run_benchmark/utils.py RENAMED Viewed

@@ -6,13 +6,15 @@ from datetime import datetime
 from urllib.parse import parse_qs, quote_plus, urlparse
 import numpy as np
+import pandas as pd
+from scipy.interpolate import interp1d
 from slack_sdk import WebClient
 from sdgym.s3 import parse_s3_path
 OUTPUT_DESTINATION_AWS = 's3://sdgym-benchmark/Benchmarks/'
 DEBUG_SLACK_CHANNEL = 'sdv-alerts-debug'
-SLACK_CHANNEL = 'sdv-alerts'
+SLACK_CHANNEL = 'sdgym'
 KEY_DATE_FILE = '_BENCHMARK_DATES.json'
 PLOTLY_MARKERS = [
     'circle',
@@ -45,18 +47,7 @@ PLOTLY_MARKERS = [
     'diamond-cross',
     'diamond-x',
 ]
-# The synthesizers inside the same list will be run by the same ec2 instance
-SYNTHESIZERS_SPLIT_SINGLE_TABLE = [
-    ['UniformSynthesizer', 'ColumnSynthesizer', 'GaussianCopulaSynthesizer', 'TVAESynthesizer'],
-    ['CopulaGANSynthesizer'],
-    ['CTGANSynthesizer'],
-    ['RealTabFormerSynthesizer'],
-]
-SYNTHESIZERS_SPLIT_MULTI_TABLE = [
-    ['HMASynthesizer'],
-    ['HSASynthesizer', 'IndependentSynthesizer', 'MultiTableUniformSynthesizer'],
-]
+PLOT_PADDING = 0.25
 def _get_filename_to_gdrive_link():
@@ -104,7 +95,7 @@ def post_slack_message(channel, text):
 def post_benchmark_launch_message(date_str, compute_service='AWS', modality='single_table'):
-    """Post a message to the SDV Alerts Slack channel when the benchmark is launched."""
+    """Post a message to the sdgym Slack channel when the benchmark is launched."""
     channel = SLACK_CHANNEL
     folder_name = get_result_folder_name(date_str)
     bucket, prefix = parse_s3_path(OUTPUT_DESTINATION_AWS)
@@ -116,7 +107,7 @@ def post_benchmark_launch_message(date_str, compute_service='AWS', modality='sin
 def post_benchmark_uploaded_message(folder_name, commit_url=None, modality='single_table'):
-    """Post benchmark uploaded message to sdv-alerts slack channel."""
+    """Post benchmark uploaded message to the sdgym Slack channel."""
     file_to_gdrive_link = _get_filename_to_gdrive_link()
     channel = SLACK_CHANNEL
     bucket, prefix = parse_s3_path(OUTPUT_DESTINATION_AWS)
@@ -136,6 +127,34 @@ def post_benchmark_uploaded_message(folder_name, commit_url=None, modality='sing
     post_slack_message(channel, body)
+def _add_pareto_curve_extremity_points(df_to_plot):
+    """Add extremity points to the Pareto curve for better visualization."""
+    pareto = df_to_plot.loc[df_to_plot['Pareto']].sort_values('Aggregated_Time')
+    if len(pareto) < 2:
+        return df_to_plot.reset_index(drop=True)  # Not enough points to define a curve
+    interp = interp1d(
+        pareto['Log10 Aggregated_Time'],
+        pareto['Quality_Score'],
+        kind='linear',
+        fill_value='extrapolate',
+    )
+    min_log = np.log10(df_to_plot['Aggregated_Time'].min()) - PLOT_PADDING
+    max_log = np.log10(df_to_plot['Aggregated_Time'].max()) + PLOT_PADDING
+    extremities = pd.DataFrame({
+        'Synthesizer': np.nan,
+        'Aggregated_Time': 10 ** np.array([min_log, max_log]),
+        'Quality_Score': interp([min_log, max_log]),
+        'Log10 Aggregated_Time': [min_log, max_log],
+        'Pareto': True,
+        'Color': '#01E0C9',
+        'Marker': np.nan,
+    })
+    return pd.concat([df_to_plot, extremities], ignore_index=True).reset_index(drop=True)
 def get_df_to_plot(benchmark_result):
     """Get the data to plot from the benchmark result.
@@ -177,8 +196,9 @@ def get_df_to_plot(benchmark_result):
     }
     df_to_plot['Marker'] = df_to_plot['Synthesizer'].map(marker_map)
     df_to_plot = df_to_plot.rename(columns={'Adjusted_Quality_Score': 'Quality_Score'})
+    df_to_plot = df_to_plot.drop(columns=['Cumulative Quality Score'])
-    return df_to_plot.drop(columns=['Cumulative Quality Score']).reset_index(drop=True)
+    return _add_pareto_curve_extremity_points(df_to_plot)
 def _parse_args():
@@ -203,3 +223,8 @@ def _extract_google_file_id(google_drive_link):
             return parsed.path.split(marker, 1)[1].split('/', 1)[0]
     raise ValueError(f'Invalid Google Drive link format: {google_drive_link}')
+def _exclude_datasets(datasets, dataset_to_exclude):
+    """Exclude datasets that are in the dataset_to_exclude list."""
+    return [dataset for dataset in datasets if dataset not in dataset_to_exclude]

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/utils.py RENAMED Viewed

@@ -11,6 +11,7 @@ import humanfriendly
 import numpy as np
 import pandas as pd
 import psutil
+from openpyxl.utils import get_column_letter
 from sdgym.errors import SDGymError
 from sdgym.synthesizers.base import BaselineSynthesizer
@@ -195,3 +196,11 @@ def convert_metadata_to_sdmetrics(metadata_dict):
     """Convert a sdv metadata dictionary into sdmetrics expected metadata."""
     table_name = next(iter(metadata_dict['tables']))
     return metadata_dict['tables'][table_name]
+def _set_column_width(writer, df, sheet_name):
+    worksheet = writer.sheets[sheet_name]
+    for col_idx, column in enumerate(df.columns, 1):
+        max_length = max(df[column].astype(str).map(len).max(), len(column))
+        column_letter = get_column_letter(col_idx)
+        worksheet.column_dimensions[column_letter].width = max_length + 2

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0/sdgym.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.4
 Name: sdgym
-Version: 0.12.2.dev0
+Version: 0.13.1.dev0
 Summary: Benchmark tabular synthetic data generators using a variety of datasets
 Author-email: "DataCebo, Inc." <info@sdv.dev>
-License: BSL-1.1
+License-Expression: BUSL-1.1
 Project-URL: Source Code, https://github.com/sdv-dev/SDGym/
 Project-URL: Issue Tracker, https://github.com/sdv-dev/SDGym/issues
 Project-URL: Changes, https://github.com/sdv-dev/SDGym/blob/main/HISTORY.md
@@ -12,7 +12,6 @@ Project-URL: Chat, https://bit.ly/sdv-slack-invite
 Keywords: machine learning,synthetic data generation,benchmark,generative models
 Classifier: Development Status :: 2 - Pre-Alpha
 Classifier: Intended Audience :: Developers
-Classifier: License :: Free for non-commercial use
 Classifier: Natural Language :: English
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
@@ -20,44 +19,57 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Python: <3.14,>=3.9
+Requires-Python: <3.15,>=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: appdirs>=1.3
 Requires-Dist: boto3<2,>=1.28
 Requires-Dist: botocore<2,>=1.31
-Requires-Dist: cloudpickle>=2.1.0
+Requires-Dist: cloudpickle>=2.1.0; python_version < "3.14"
+Requires-Dist: cloudpickle>=3.1.1; python_version >= "3.14"
 Requires-Dist: compress-pickle>=1.2.0
-Requires-Dist: google-cloud-compute>=1.0.0
-Requires-Dist: google-auth>=2.0.0
+Requires-Dist: google-cloud-compute>=1.30.0
+Requires-Dist: google-auth>=2.14.1
 Requires-Dist: humanfriendly>=10.0
 Requires-Dist: numpy>=1.22.2; python_version < "3.10"
 Requires-Dist: numpy>=1.24.0; python_version >= "3.10" and python_version < "3.12"
 Requires-Dist: numpy>=1.26.0; python_version >= "3.12" and python_version < "3.13"
-Requires-Dist: numpy>=2.1.0; python_version >= "3.13"
+Requires-Dist: numpy>=2.1.0; python_version >= "3.13" and python_version < "3.14"
+Requires-Dist: numpy>=2.3.2; python_version >= "3.14"
 Requires-Dist: openpyxl>=3.1.2
-Requires-Dist: pandas<3.0.0,>=1.4.0; python_version < "3.11"
-Requires-Dist: pandas<3.0.0,>=1.5.0; python_version >= "3.11" and python_version < "3.12"
-Requires-Dist: pandas<3.0.0,>=2.1.1; python_version >= "3.12" and python_version < "3.13"
-Requires-Dist: pandas<3.0.0,>=2.2.3; python_version >= "3.13"
-Requires-Dist: psutil>=5.7
+Requires-Dist: pandas<3,>=1.4.0; python_version < "3.11"
+Requires-Dist: pandas<3,>=1.5.0; python_version >= "3.11" and python_version < "3.12"
+Requires-Dist: pandas<3,>=2.1.1; python_version >= "3.12" and python_version < "3.13"
+Requires-Dist: pandas<3,>=2.2.3; python_version >= "3.13" and python_version < "3.14"
+Requires-Dist: pandas<3,>=2.3.3; python_version >= "3.14"
+Requires-Dist: psutil>=5.8
 Requires-Dist: scikit-learn>=1.0.2; python_version < "3.10"
 Requires-Dist: scikit-learn>=1.1.0; python_version >= "3.10" and python_version < "3.11"
 Requires-Dist: scikit-learn>=1.1.3; python_version >= "3.11" and python_version < "3.12"
 Requires-Dist: scikit-learn>=1.3.1; python_version >= "3.12" and python_version < "3.13"
-Requires-Dist: scikit-learn>=1.5.2; python_version >= "3.13"
+Requires-Dist: scikit-learn>=1.5.2; python_version >= "3.13" and python_version < "3.14"
+Requires-Dist: scikit-learn>=1.8.0; python_version >= "3.14"
 Requires-Dist: scipy>=1.7.3; python_version < "3.10"
 Requires-Dist: scipy>=1.9.2; python_version >= "3.10" and python_version < "3.12"
 Requires-Dist: scipy>=1.12.0; python_version >= "3.12" and python_version < "3.13"
-Requires-Dist: scipy>=1.14.1; python_version >= "3.13"
+Requires-Dist: scipy>=1.14.1; python_version >= "3.13" and python_version < "3.14"
+Requires-Dist: scipy>=1.16.1; python_version >= "3.14"
 Requires-Dist: tabulate<0.9,>=0.8.3
-Requires-Dist: torch>=2.6.0
+Requires-Dist: torch>=1.13.0; python_version < "3.11"
+Requires-Dist: torch>=2.0.0; python_version >= "3.11" and python_version < "3.12"
+Requires-Dist: torch>=2.3.0; python_version >= "3.12" and python_version < "3.13"
+Requires-Dist: torch>=2.6.0; python_version >= "3.13" and python_version < "3.14"
+Requires-Dist: torch>=2.9.0; python_version >= "3.14"
 Requires-Dist: tqdm>=4.66.3
 Requires-Dist: XlsxWriter>=1.2.8
-Requires-Dist: rdt>=1.17.0
-Requires-Dist: sdmetrics>=0.20.1
-Requires-Dist: sdv>=1.21.0
+Requires-Dist: rdt>=1.18.2; python_version < "3.14"
+Requires-Dist: rdt>=1.20.0; python_version >= "3.14"
+Requires-Dist: sdmetrics>=0.21.0; python_version < "3.14"
+Requires-Dist: sdmetrics>=0.26.0; python_version >= "3.14"
+Requires-Dist: sdv>=1.21.0; python_version < "3.14"
+Requires-Dist: sdv>=1.33.0; python_version >= "3.14"
 Provides-Extra: dask
 Requires-Dist: dask; extra == "dask"
 Requires-Dist: distributed; extra == "dask"

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym.egg-info/requires.txt RENAMED Viewed

@@ -1,20 +1,15 @@
 appdirs>=1.3
 boto3<2,>=1.28
 botocore<2,>=1.31
-cloudpickle>=2.1.0
 compress-pickle>=1.2.0
-google-cloud-compute>=1.0.0
-google-auth>=2.0.0
+google-cloud-compute>=1.30.0
+google-auth>=2.14.1
 humanfriendly>=10.0
 openpyxl>=3.1.2
-psutil>=5.7
+psutil>=5.8
 tabulate<0.9,>=0.8.3
-torch>=2.6.0
 tqdm>=4.66.3
 XlsxWriter>=1.2.8
-rdt>=1.17.0
-sdmetrics>=0.20.1
-sdv>=1.21.0
 [:python_version < "3.10"]
 numpy>=1.22.2
@@ -22,7 +17,14 @@ scikit-learn>=1.0.2
 scipy>=1.7.3
 [:python_version < "3.11"]
-pandas<3.0.0,>=1.4.0
+pandas<3,>=1.4.0
+torch>=1.13.0
+[:python_version < "3.14"]
+cloudpickle>=2.1.0
+rdt>=1.18.2
+sdmetrics>=0.21.0
+sdv>=1.21.0
 [:python_version >= "3.10" and python_version < "3.11"]
 scikit-learn>=1.1.0
@@ -32,20 +34,34 @@ numpy>=1.24.0
 scipy>=1.9.2
 [:python_version >= "3.11" and python_version < "3.12"]
-pandas<3.0.0,>=1.5.0
+pandas<3,>=1.5.0
 scikit-learn>=1.1.3
+torch>=2.0.0
 [:python_version >= "3.12" and python_version < "3.13"]
 numpy>=1.26.0
-pandas<3.0.0,>=2.1.1
+pandas<3,>=2.1.1
 scikit-learn>=1.3.1
 scipy>=1.12.0
+torch>=2.3.0
-[:python_version >= "3.13"]
+[:python_version >= "3.13" and python_version < "3.14"]
 numpy>=2.1.0
-pandas<3.0.0,>=2.2.3
+pandas<3,>=2.2.3
 scikit-learn>=1.5.2
 scipy>=1.14.1
+torch>=2.6.0
+[:python_version >= "3.14"]
+cloudpickle>=3.1.1
+numpy>=2.3.2
+pandas<3,>=2.3.3
+scikit-learn>=1.8.0
+scipy>=1.16.1
+torch>=2.9.0
+rdt>=1.20.0
+sdmetrics>=0.26.0
+sdv>=1.33.0
 [all]
 sdgym[dask,dev,test]

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/tests/test_tasks.py RENAMED Viewed

@@ -118,7 +118,7 @@ def _get_example_pyproject_dict():
                 ],
             },
             'readme': 'README.md',
-            'requires-python': '>=3.9,<3.13',
+            'requires-python': '>=3.9,<3.15',
         },
         'tool': {
             'bumpversion': {

sdgym-0.12.2.dev0/sdgym/run_benchmark/run_benchmark.py DELETED Viewed

@@ -1,152 +0,0 @@
-"""Script to run a benchmark and upload results to S3."""
-import json
-import os
-from datetime import datetime, timezone
-from botocore.exceptions import ClientError
-from sdgym._benchmark.benchmark import (
-    _benchmark_multi_table_compute_gcp,
-    _benchmark_single_table_compute_gcp,
-)
-from sdgym.run_benchmark.utils import (
-    KEY_DATE_FILE,
-    OUTPUT_DESTINATION_AWS,
-    SYNTHESIZERS_SPLIT_MULTI_TABLE,
-    SYNTHESIZERS_SPLIT_SINGLE_TABLE,
-    _parse_args,
-    get_result_folder_name,
-    post_benchmark_launch_message,
-)
-from sdgym.s3 import get_s3_client, parse_s3_path
-MODALITY_TO_SETUP = {
-    'single_table': {
-        'method': _benchmark_single_table_compute_gcp,
-        'synthesizers_split': SYNTHESIZERS_SPLIT_SINGLE_TABLE,
-        'datasets': [
-            'adult',
-            'alarm',
-            'census',
-            'child',
-            'covtype',
-            'expedia_hotel_logs',
-            'insurance',
-            'intrusion',
-            'news',
-        ],
-    },
-    'multi_table': {
-        'method': _benchmark_multi_table_compute_gcp,
-        'synthesizers_split': SYNTHESIZERS_SPLIT_MULTI_TABLE,
-        'datasets': [
-            'WebKP',
-            'DCG',
-            'UW_std',
-            'Same_gen',
-            'CORA',
-            'got_families',
-            'SalesDB',
-            'UTube',
-            'Student_loan',
-            'Hepatitis_std',
-            'Elti',
-            'Bupa',
-            'Toxicology',
-            'imdb_ijs',
-            'ftp',
-            'imdb_small',
-            'imdb_MovieLens',
-            'Pima',
-            'university',
-            'legalActs',
-            'Dunur',
-            'Mesh',
-            'world',
-            'airbnb-simplified',
-            'trains',
-            'FNHK',
-            'fake_hotels',
-            'SAT',
-            'genes',
-            'Biodegradability',
-            'Pyrimidine',
-            'mutagenesis',
-            'restbase',
-            'Triazine',
-            'Carcinogenesis',
-            'fake_hotels_extended',
-            'Mooney_Family',
-            'PTE',
-            'Facebook',
-            'multi_table_ID_demo_dataset',
-            'SAP',
-            'Chess',
-            'Countries',
-            'NCAA',
-            'Atherosclerosis',
-            'nations',
-            'TubePricing',
-            'financial',
-            'Accidents',
-            'MuskSmall',
-            'NBA',
-            'AustralianFootball',
-            'PremierLeague',
-            'OMOP_CDM_dayz',
-        ],
-    },
-}
-def append_benchmark_run(
-    aws_access_key_id, aws_secret_access_key, date_str, modality='single_table'
-):
-    """Append a new benchmark run to the benchmark dates file in S3."""
-    s3_client = get_s3_client(
-        aws_access_key_id=aws_access_key_id,
-        aws_secret_access_key=aws_secret_access_key,
-    )
-    bucket, prefix = parse_s3_path(OUTPUT_DESTINATION_AWS)
-    try:
-        object = s3_client.get_object(Bucket=bucket, Key=f'{prefix}{modality}/{KEY_DATE_FILE}')
-        body = object['Body'].read().decode('utf-8')
-        data = json.loads(body)
-    except ClientError as e:
-        if e.response['Error']['Code'] == 'NoSuchKey':
-            data = {'runs': []}
-        else:
-            raise RuntimeError(f'Failed to read {KEY_DATE_FILE} from S3: {e}')
-    data['runs'].append({'date': date_str, 'folder_name': get_result_folder_name(date_str)})
-    data['runs'] = sorted(data['runs'], key=lambda x: x['date'])
-    s3_client.put_object(
-        Bucket=bucket,
-        Key=f'{prefix}{modality}/{KEY_DATE_FILE}',
-        Body=json.dumps(data).encode('utf-8'),
-    )
-def main():
-    """Main function to run the benchmark and upload results."""
-    args = _parse_args()
-    aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
-    aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
-    date_str = datetime.now(timezone.utc).strftime('%Y-%m-%d')
-    modality = args.modality
-    for synthesizer_group in MODALITY_TO_SETUP[modality]['synthesizers_split']:
-        MODALITY_TO_SETUP[modality]['method'](
-            output_destination=OUTPUT_DESTINATION_AWS,
-            credential_filepath=os.getenv('CREDENTIALS_FILEPATH'),
-            synthesizers=synthesizer_group,
-            sdv_datasets=MODALITY_TO_SETUP[modality]['datasets'],
-            timeout=345600,  # 4 days
-        )
-    append_benchmark_run(aws_access_key_id, aws_secret_access_key, date_str, modality=modality)
-    post_benchmark_launch_message(date_str, compute_service='GCP', modality=modality)
-if __name__ == '__main__':
-    main()

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/LICENSE RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/README.md RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/_benchmark/__init__.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/_benchmark/benchmark.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/_benchmark/config_utils.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/_dataset_utils.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/cli/__init__.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/cli/collect.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/cli/utils.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/dataset_explorer.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/datasets.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/errors.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/metrics.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/progress.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/result_explorer/__init__.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/result_explorer/result_explorer.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/result_explorer/result_handler.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/run_benchmark/__init__.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/s3.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizer_descriptions.yaml RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/__init__.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/base.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/column.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/generate.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/identity.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/realtabformer.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/sdv.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/uniform.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/utils.py RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym.egg-info/entry_points.txt RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym.egg-info/top_level.txt RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/setup.cfg RENAMED Viewed

File without changes

{sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/tests/test_scripts.py RENAMED Viewed

File without changes

sdgym 0.12.2.dev0__tar.gz → 0.13.1.dev0__tar.gz

sdgym 0.12.2.dev0tar.gz → 0.13.1.dev0tar.gz