PyPI - sdgym - Versions diffs - 0.11.1.dev0__tar.gz → 0.11.2.dev0__tar.gz - Mend

sdgym 0.11.1.dev0tar.gz → 0.11.2.dev0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{sdgym-0.11.1.dev0/sdgym.egg-info → sdgym-0.11.2.dev0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sdgym
-Version: 0.11.1.dev0
+Version: 0.11.2.dev0
 Summary: Benchmark tabular synthetic data generators using a variety of datasets
 Author-email: "DataCebo, Inc." <info@sdv.dev>
 License: BSL-1.1
@@ -194,10 +194,10 @@ Learn more in the [Custom Synthesizers Guide](https://docs.sdv.dev/sdgym/customi
 ## Customizing your datasets
 The SDGym library includes many publicly available datasets that you can include right away.
-List these using the ``get_available_datasets`` feature.
+List these using the ``list_datasets`` feature.
 ```python
-sdgym.get_available_datasets()
+sdgym.dataset_explorer.DatasetExplorer().list_datasets()
 ```
 ```

{sdgym-0.11.1.dev0 → sdgym-0.11.2.dev0}/README.md RENAMED Viewed

@@ -103,10 +103,10 @@ Learn more in the [Custom Synthesizers Guide](https://docs.sdv.dev/sdgym/customi
 ## Customizing your datasets
 The SDGym library includes many publicly available datasets that you can include right away.
-List these using the ``get_available_datasets`` feature.
+List these using the ``list_datasets`` feature.
 ```python
-sdgym.get_available_datasets()
+sdgym.dataset_explorer.DatasetExplorer().list_datasets()
 ```
 ```

{sdgym-0.11.1.dev0 → sdgym-0.11.2.dev0}/pyproject.toml RENAMED Viewed

@@ -144,7 +144,7 @@ namespaces = false
 version = {attr = 'sdgym.__version__'}
 [tool.bumpversion]
-current_version = "0.11.1.dev0"
+current_version = "0.11.2.dev0"
 parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
 serialize = [
     '{major}.{minor}.{patch}.{release}{candidate}',

{sdgym-0.11.1.dev0 → sdgym-0.11.2.dev0}/sdgym/__init__.py RENAMED Viewed

@@ -8,16 +8,20 @@ __author__ = 'DataCebo, Inc.'
 __copyright__ = 'Copyright (c) 2022 DataCebo, Inc.'
 __email__ = 'info@sdv.dev'
 __license__ = 'BSL-1.1'
-__version__ = '0.11.1.dev0'
+__version__ = '0.11.2.dev0'
 import logging
-from sdgym.benchmark import benchmark_single_table
+from sdgym.benchmark import benchmark_single_table, benchmark_single_table_aws
 from sdgym.cli.collect import collect_results
 from sdgym.cli.summary import make_summary_spreadsheet
 from sdgym.dataset_explorer import DatasetExplorer
-from sdgym.datasets import get_available_datasets, load_dataset
-from sdgym.synthesizers import create_sdv_synthesizer_variant, create_single_table_synthesizer
+from sdgym.datasets import load_dataset
+from sdgym.synthesizers import (
+    create_synthesizer_variant,
+    create_single_table_synthesizer,
+    create_multi_table_synthesizer,
+)
 from sdgym.result_explorer import ResultsExplorer
 # Clear the logging wrongfully configured by tensorflow/absl
@@ -28,10 +32,11 @@ __all__ = [
     'DatasetExplorer',
     'ResultsExplorer',
     'benchmark_single_table',
+    'benchmark_single_table_aws',
     'collect_results',
-    'create_sdv_synthesizer_variant',
+    'create_synthesizer_variant',
     'create_single_table_synthesizer',
-    'get_available_datasets',
+    'create_multi_table_synthesizer',
     'load_dataset',
     'make_summary_spreadsheet',
 ]

{sdgym-0.11.1.dev0 → sdgym-0.11.2.dev0}/sdgym/benchmark.py RENAMED Viewed

@@ -52,7 +52,7 @@ from sdgym.s3 import (
     write_csv,
     write_file,
 )
-from sdgym.synthesizers import CTGANSynthesizer, GaussianCopulaSynthesizer, UniformSynthesizer
+from sdgym.synthesizers import UniformSynthesizer
 from sdgym.synthesizers.base import BaselineSynthesizer
 from sdgym.utils import (
     calculate_score_time,
@@ -67,7 +67,7 @@ from sdgym.utils import (
 )
 LOGGER = logging.getLogger(__name__)
-DEFAULT_SYNTHESIZERS = [GaussianCopulaSynthesizer, CTGANSynthesizer, UniformSynthesizer]
+DEFAULT_SYNTHESIZERS = ['GaussianCopulaSynthesizer', 'CTGANSynthesizer', 'UniformSynthesizer']
 DEFAULT_DATASETS = [
     'adult',
     'alarm',
@@ -271,7 +271,11 @@ def _generate_job_args_list(
         if additional_datasets_folder is None
         else get_dataset_paths(
             modality='single_table',
-            bucket=additional_datasets_folder,
+            bucket=(
+                additional_datasets_folder
+                if is_s3_path(additional_datasets_folder)
+                else os.path.join(additional_datasets_folder, 'single_table')
+            ),
             aws_access_key_id=aws_access_key_id,
             aws_secret_access_key=aws_secret_access_key_key,
         )
@@ -861,6 +865,7 @@ def _directory_exists(bucket_name, s3_file_path):
 def _check_write_permissions(s3_client, bucket_name):
+    s3_client = s3_client or boto3.client('s3')
     try:
         s3_client.put_object(Bucket=bucket_name, Key='__test__', Body=b'')
         write_permission = True
@@ -881,7 +886,7 @@ def _create_sdgym_script(params, output_filepath):
     bucket_name, key_prefix = parse_s3_path(output_filepath)
     if not _directory_exists(bucket_name, key_prefix):
         raise ValueError(f'Directories in {key_prefix} do not exist')
-    if not _check_write_permissions(bucket_name):
+    if not _check_write_permissions(None, bucket_name):
         raise ValueError('No write permissions allowed for the bucket.')
     # Add quotes to parameter strings
@@ -893,23 +898,22 @@ def _create_sdgym_script(params, output_filepath):
         params['output_filepath'] = "'" + params['output_filepath'] + "'"
     # Generate the output script to run on the e2 instance
-    synthesizer_string = 'synthesizers=['
-    for synthesizer in params['synthesizers']:
+    synthesizers = params.get('synthesizers', [])
+    names = []
+    for synthesizer in synthesizers:
         if isinstance(synthesizer, str):
-            synthesizer_string += synthesizer + ', '
+            names.append(synthesizer)
+        elif hasattr(synthesizer, '__name__'):
+            names.append(synthesizer.__name__)
         else:
-            synthesizer_string += synthesizer.__name__ + ', '
-    if params['synthesizers']:
-        synthesizer_string = synthesizer_string[:-2]
-    synthesizer_string += ']'
+            names.append(synthesizer.__class__.__name__)
+    all_names = '", "'.join(names)
+    synthesizer_string = f'synthesizers=["{all_names}"]'
     # The indentation of the string is important for the python script
     script_content = f"""import boto3
 from io import StringIO
 import sdgym
-from sdgym.synthesizers.sdv import (CopulaGANSynthesizer, CTGANSynthesizer,
-    GaussianCopulaSynthesizer, HMASynthesizer, PARSynthesizer, SDVRelationalSynthesizer,
-    SDVTabularSynthesizer, TVAESynthesizer)
-from sdgym.synthesizers import RealTabFormerSynthesizer
 results = sdgym.benchmark_single_table(
     {synthesizer_string}, custom_synthesizers={params['custom_synthesizers']},
@@ -1186,7 +1190,7 @@ def benchmark_single_table(
         custom_synthesizers (list[class] or ``None``):
             A list of custom synthesizer classes to use. These can be completely custom or
             they can be synthesizer variants (the output from ``create_single_table_synthesizer``
-            or ``create_sdv_synthesizer_variant``). Defaults to ``None``.
+            or ``create_synthesizer_variant``). Defaults to ``None``.
         sdv_datasets (list[str] or ``None``):
             Names of the SDV demo datasets to use for the benchmark. Defaults to
             ``[adult, alarm, census, child, expedia_hotel_logs, insurance, intrusion, news,

{sdgym-0.11.1.dev0 → sdgym-0.11.2.dev0}/sdgym/cli/__main__.py RENAMED Viewed

@@ -97,7 +97,7 @@ def _download_datasets(args):
     _env_setup(args.logfile, args.verbose)
     datasets = args.datasets
     if not datasets:
-        datasets = sdgym.datasets.get_available_datasets(
+        datasets = sdgym.datasets._get_available_datasets(
             args.bucket, args.aws_access_key_id, args.aws_secret_access_key
         )['name']
@@ -118,7 +118,7 @@ def _list_downloaded(args):
 def _list_available(args):
-    datasets = sdgym.datasets.get_available_datasets(
+    datasets = sdgym.datasets._get_available_datasets(
         args.bucket, args.aws_access_key_id, args.aws_secret_access_key
     )
     _print_table(datasets, args.sort, args.reverse, {'size': humanfriendly.format_size})

{sdgym-0.11.1.dev0 → sdgym-0.11.2.dev0}/sdgym/dataset_explorer.py RENAMED Viewed

@@ -275,3 +275,36 @@ class DatasetExplorer:
             dataset_summary.to_csv(output_filepath, index=False)
         return dataset_summary
+    def list_datasets(self, modality, output_filepath=None):
+        """List available datasets for a modality using metainfo only.
+        This is a lightweight alternative to ``summarize_datasets`` that does not load
+        the actual data. It reads dataset information from the ``metainfo.yaml`` files
+        in the bucket and returns a table equivalent to the legacy
+        ``get_available_datasets`` output.
+        Args:
+            modality (str):
+                It must be ``'single_table'``, ``'multi_table'`` or ``'sequential'``.
+            output_filepath (str, optional):
+                Full path to a ``.csv`` file where the resulting table will be written.
+                If not provided, the table is only returned.
+        Returns:
+            pd.DataFrame:
+                A DataFrame with columns: ``['dataset_name', 'size_MB', 'num_tables']``.
+        """
+        self._validate_output_filepath(output_filepath)
+        _validate_modality(modality)
+        dataframe = _get_available_datasets(
+            modality=modality,
+            bucket=self._bucket_name,
+            aws_access_key_id=self.aws_access_key_id,
+            aws_secret_access_key=self.aws_secret_access_key,
+        )
+        if output_filepath:
+            dataframe.to_csv(output_filepath, index=False)
+        return dataframe

{sdgym-0.11.1.dev0 → sdgym-0.11.2.dev0}/sdgym/datasets.py RENAMED Viewed

@@ -254,21 +254,6 @@ def load_dataset(
     return data, metadata_dict
-def get_available_datasets(modality='single_table'):
-    """Get available single_table datasets.
-    Args:
-        modality (str):
-            It must be ``'single_table'``, ``'multi_table'`` or ``'sequential'``.
-    Return:
-        pd.DataFrame:
-            Table of available datasets and their sizes.
-    """
-    _validate_modality(modality)
-    return _get_available_datasets(modality)
 def get_dataset_paths(
     modality,
     datasets=None,

{sdgym-0.11.1.dev0 → sdgym-0.11.2.dev0}/sdgym/result_explorer/result_handler.py RENAMED Viewed

@@ -16,6 +16,7 @@ RESULTS_FOLDER_PREFIX = 'SDGym_results_'
 metainfo_PREFIX = 'metainfo'
 RESULTS_FILE_PREFIX = 'results'
 NUM_DIGITS_DATE = 10
+REGEX_SYNTHESIZER_NAME = r'\s*\(\d+\)\s*$'
 class ResultsHandler(ABC):
@@ -120,7 +121,15 @@ class ResultsHandler(ABC):
     def _process_results(self, results):
         """Process results to ensure they are unique and each dataset has all synthesizers."""
         aggregated_results = pd.concat(results, ignore_index=True)
-        aggregated_results = aggregated_results.drop_duplicates(subset=['Dataset', 'Synthesizer'])
+        aggregated_results['Synthesizer'] = (
+            aggregated_results['Synthesizer']
+            .astype(str)
+            .str.replace(REGEX_SYNTHESIZER_NAME, '', regex=True)
+            .str.strip()
+        )
+        aggregated_results = aggregated_results.drop_duplicates(
+            subset=['Dataset', 'Synthesizer'], keep='first'
+        )
         all_synthesizers = aggregated_results['Synthesizer'].unique()
         dataset_synth_counts = aggregated_results.groupby('Dataset')['Synthesizer'].nunique()
         valid_datasets = dataset_synth_counts[dataset_synth_counts == len(all_synthesizers)].index

sdgym-0.11.2.dev0/sdgym/synthesizers/__init__.py ADDED Viewed

@@ -0,0 +1,32 @@
+"""Synthesizers module."""
+from sdgym.synthesizers.generate import (
+    create_synthesizer_variant,
+    create_single_table_synthesizer,
+    create_multi_table_synthesizer,
+)
+from sdgym.synthesizers.identity import DataIdentity
+from sdgym.synthesizers.column import ColumnSynthesizer
+from sdgym.synthesizers.realtabformer import RealTabFormerSynthesizer
+from sdgym.synthesizers.uniform import UniformSynthesizer
+from sdgym.synthesizers.utils import (
+    get_available_single_table_synthesizers,
+    get_available_multi_table_synthesizers,
+)
+from sdgym.synthesizers.sdv import create_sdv_synthesizer_class, _get_all_sdv_synthesizers
+__all__ = [
+    'DataIdentity',
+    'ColumnSynthesizer',
+    'UniformSynthesizer',
+    'RealTabFormerSynthesizer',
+    'create_single_table_synthesizer',
+    'create_multi_table_synthesizer',
+    'create_synthesizer_variant',
+    'get_available_single_table_synthesizers',
+    'get_available_multi_table_synthesizers',
+]
+for sdv_name in _get_all_sdv_synthesizers():
+    create_sdv_synthesizer_class(sdv_name)

sdgym-0.11.2.dev0/sdgym/synthesizers/base.py ADDED Viewed

@@ -0,0 +1,92 @@
+"""Base classes for synthesizers."""
+import abc
+import logging
+import warnings
+from sdv.metadata import Metadata
+LOGGER = logging.getLogger(__name__)
+class BaselineSynthesizer(abc.ABC):
+    """Base class for all the ``SDGym`` baselines."""
+    _MODEL_KWARGS = {}
+    _NATIVELY_SUPPORTED = True
+    @classmethod
+    def get_subclasses(cls, include_parents=False):
+        """Recursively find subclasses of this Baseline.
+        Args:
+            include_parents (bool):
+                Whether to include subclasses which are parents to
+                other classes. Defaults to ``False``.
+        """
+        subclasses = {}
+        for child in cls.__subclasses__():
+            grandchildren = child.get_subclasses(include_parents)
+            subclasses.update(grandchildren)
+            if include_parents or not grandchildren:
+                subclasses[child.__name__] = child
+        return subclasses
+    @classmethod
+    def _get_supported_synthesizers(cls):
+        """Get the natively supported synthesizer class names."""
+        subclasses = cls.get_subclasses(include_parents=True)
+        synthesizers = set()
+        for name, subclass in subclasses.items():
+            if subclass._NATIVELY_SUPPORTED:
+                synthesizers.add(name)
+        return sorted(synthesizers)
+    @classmethod
+    def get_baselines(cls):
+        """Get baseline classes."""
+        subclasses = cls.get_subclasses(include_parents=True)
+        synthesizers = []
+        for _, subclass in subclasses.items():
+            if abc.ABC not in subclass.__bases__:
+                synthesizers.append(subclass)
+        return synthesizers
+    def get_trained_synthesizer(self, data, metadata):
+        """Get a synthesizer that has been trained on the provided data and metadata.
+        Args:
+            data (pandas.DataFrame):
+                The data to train on.
+            metadata (dict):
+                The metadata dictionary.
+        Returns:
+            obj:
+                The synthesizer object.
+        """
+        metadata_object = Metadata()
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', UserWarning)
+            metadata = metadata_object.load_from_dict(metadata)
+        return self._get_trained_synthesizer(data, metadata)
+    def sample_from_synthesizer(self, synthesizer, n_samples):
+        """Sample data from the provided synthesizer.
+        Args:
+            synthesizer (obj):
+                The synthesizer object to sample data from.
+            n_samples (int):
+                The number of samples to create.
+        Returns:
+            pandas.DataFrame or dict:
+                The sampled data. If single-table, should be a DataFrame. If multi-table,
+                should be a dict mapping table name to DataFrame.
+        """
+        return self._sample_from_synthesizer(synthesizer, n_samples)

sdgym-0.11.2.dev0/sdgym/synthesizers/generate.py ADDED Viewed

@@ -0,0 +1,110 @@
+"""Helpers to create SDGym synthesizer variants."""
+from sdgym.synthesizers.base import BaselineSynthesizer
+from sdgym.synthesizers.utils import _get_supported_synthesizers
+def create_synthesizer_variant(display_name, synthesizer_class, synthesizer_parameters):
+    """Create a new synthesizer variant.
+    Args:
+        display_name (str):
+            Name of this synthesizer, used for display purposes in results.
+        synthesizer_class (str):
+            Name of the SDV synthesizer class to wrap.
+        synthesizer_parameters (dict):
+            A dictionary of the parameter names and values that will be used for the synthesizer.
+    Returns:
+        class:
+            The synthesizer class.
+    """
+    if synthesizer_class not in _get_supported_synthesizers():
+        raise ValueError(f"Synthesizer '{synthesizer_class}' is not a SDGym supported synthesizer.")
+    base_class = BaselineSynthesizer.get_subclasses().get(synthesizer_class)
+    NewSynthesizer = type(
+        f'Variant:{display_name}',
+        (base_class,),
+        {
+            '__module__': __name__,
+            '_MODEL_KWARGS': synthesizer_parameters,
+            '_NATIVELY_SUPPORTED': False,
+        },
+    )
+    return NewSynthesizer
+def _create_synthesizer_class(display_name, get_trained_fn, sample_fn, sample_arg_name):
+    """Create a synthesizer class.
+    Args:
+        display_name(string):
+            A string with the name of this synthesizer, used for display purposes only when
+            the results are generated
+        get_trained_synthesizer_fn (callable):
+            A function to generate and train a synthesizer, given the real data and metadata.
+        sample_from_synthesizer (callable):
+            A function to sample from the given synthesizer.
+        sample_arg_name (str):
+            The name of the argument used to specify the number of samples to generate.
+            Either 'num_samples' for single-table synthesizers, or 'scale' for multi-table
+            synthesizers.
+    Returns:
+        class:
+            The synthesizer class.
+    """
+    class_name = f'Custom:{display_name}'
+    def get_trained_synthesizer(self, data, metadata):
+        return get_trained_fn(data, metadata)
+    if sample_arg_name == 'num_samples':
+        def sample_from_synthesizer(self, synthesizer, num_samples):
+            return sample_fn(synthesizer, num_samples)
+    else:
+        def sample_from_synthesizer(self, synthesizer, scale):
+            return sample_fn(synthesizer, scale)
+    CustomSynthesizer = type(
+        class_name,
+        (BaselineSynthesizer,),
+        {
+            '__module__': __name__,
+            '_NATIVELY_SUPPORTED': False,
+            'get_trained_synthesizer': get_trained_synthesizer,
+            'sample_from_synthesizer': sample_from_synthesizer,
+        },
+    )
+    globals()[class_name] = CustomSynthesizer
+    return CustomSynthesizer
+def create_single_table_synthesizer(
+    display_name, get_trained_synthesizer_fn, sample_from_synthesizer_fn
+):
+    """Create a single-table synthesizer class."""
+    return _create_synthesizer_class(
+        display_name,
+        get_trained_synthesizer_fn,
+        sample_from_synthesizer_fn,
+        sample_arg_name='num_samples',
+    )
+def create_multi_table_synthesizer(
+    display_name, get_trained_synthesizer_fn, sample_from_synthesizer_fn
+):
+    """Create a multi-table synthesizer class."""
+    return _create_synthesizer_class(
+        display_name,
+        get_trained_synthesizer_fn,
+        sample_from_synthesizer_fn,
+        sample_arg_name='scale',
+    )

sdgym-0.11.2.dev0/sdgym/synthesizers/sdv.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""SDV synthesizers wrappers for SDGym."""
+import logging
+import sys
+from importlib import import_module
+from sdv import multi_table, single_table
+from sdgym.synthesizers.base import BaselineSynthesizer
+LOGGER = logging.getLogger(__name__)
+UNSUPPORTED_SDV_SYNTHESIZERS = ['DayZSynthesizer']
+MODALITY_TO_MODULE = {
+    'single_table': single_table,
+    'multi_table': multi_table,
+}
+def _validate_modality(modality):
+    """Validate that the modality is correct."""
+    if modality not in ['single_table', 'multi_table']:
+        raise ValueError("`modality` must be one of 'single_table' or 'multi_table'.")
+def _get_sdv_synthesizers(modality):
+    _validate_modality(modality)
+    module = MODALITY_TO_MODULE[modality]
+    available_synthesizer = {name for name, cls in module.__dict__.items() if isinstance(cls, type)}
+    available_synthesizer = available_synthesizer - set(UNSUPPORTED_SDV_SYNTHESIZERS)
+    return sorted(available_synthesizer)
+def _get_all_sdv_synthesizers():
+    """Get all available SDV synthesizers."""
+    synthesizers = set()
+    for modality in MODALITY_TO_MODULE.keys():
+        synthesizers.update(_get_sdv_synthesizers(modality))
+    return sorted(synthesizers)
+def _get_trained_synthesizer(self, data, metadata):
+    LOGGER.info('Fitting %s', self.__class__.__name__)
+    sdv_class = getattr(import_module(f'sdv.{self.modality}'), self.SDV_NAME)
+    synthesizer = sdv_class(metadata=metadata, **self._MODEL_KWARGS)
+    synthesizer.fit(data)
+    return synthesizer
+def _sample_from_synthesizer(self, synthesizer, sample_arg):
+    LOGGER.info('Sampling %s', self.__class__.__name__)
+    if self.modality == 'multi_table':
+        return synthesizer.sample(scale=sample_arg)
+    return synthesizer.sample(num_rows=sample_arg)
+def _retrieve_sdv_class(sdv_name):
+    current_module = sys.modules[__name__]
+    if hasattr(current_module, sdv_name):
+        existing_class = getattr(current_module, sdv_name)
+        if isinstance(existing_class, type):
+            return existing_class
+    return None
+def _get_modality(sdv_name):
+    """Get the modality of a SDV synthesizer."""
+    st_synthesizers = _get_sdv_synthesizers('single_table')
+    if sdv_name in st_synthesizers:
+        return 'single_table'
+    mt_synthesizers = _get_sdv_synthesizers('multi_table')
+    if sdv_name in mt_synthesizers:
+        return 'multi_table'
+    raise ValueError(f"Synthesizer '{sdv_name}' is not a SDV synthesizer.")
+def _create_sdv_class(sdv_name):
+    """Create a SDV synthesizer class dynamically."""
+    current_module = sys.modules[__name__]
+    modality = _get_modality(sdv_name)
+    synthesizer_class = type(
+        sdv_name,
+        (BaselineSynthesizer,),
+        {
+            '__module__': __name__,
+            'SDV_NAME': sdv_name,
+            'modality': modality,
+            '_MODEL_KWARGS': {},
+            '_get_trained_synthesizer': _get_trained_synthesizer,
+            '_sample_from_synthesizer': _sample_from_synthesizer,
+        },
+    )
+    setattr(current_module, sdv_name, synthesizer_class)
+    return synthesizer_class
+def create_sdv_synthesizer_class(sdv_name):
+    """Factory for dynamically creating or retrieving SDV synthesizer classes."""
+    if sdv_name not in _get_all_sdv_synthesizers():
+        raise ValueError(f"Synthesizer '{sdv_name}' is not a supported SDV synthesizer.")
+    return _retrieve_sdv_class(sdv_name) or _create_sdv_class(sdv_name)

sdgym-0.11.2.dev0/sdgym/synthesizers/utils.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""Utility functions for synthesizers in SDGym."""
+from sdgym.synthesizers.base import BaselineSynthesizer
+from sdgym.synthesizers.sdv import _get_all_sdv_synthesizers, _get_sdv_synthesizers
+def _get_sdgym_synthesizers():
+    """Get SDGym synthesizers.
+    Returns:
+        list:
+            A list of available SDGym synthesizer names.
+    """
+    synthesizers = BaselineSynthesizer._get_supported_synthesizers()
+    sdv_synthesizer = _get_all_sdv_synthesizers()
+    sdgym_synthesizer = [
+        synthesizer for synthesizer in synthesizers if synthesizer not in sdv_synthesizer
+    ]
+    return sorted(sdgym_synthesizer)
+def get_available_single_table_synthesizers():
+    """List all available single-table synthesizers in SDGym.
+    Returns:
+        list:
+            A sorted list of available single-table synthesizer names.
+    """
+    sdv_synthesizers = _get_sdv_synthesizers('single_table')
+    sdgym_synthesizers = _get_sdgym_synthesizers()
+    return sorted(sdv_synthesizers + sdgym_synthesizers)
+def get_available_multi_table_synthesizers():
+    """List all available multi-table synthesizers in SDGym.
+    Returns:
+        list:
+            A sorted list of available multi-table synthesizer names.
+    """
+    return sorted(_get_sdv_synthesizers('multi_table'))
+def _get_supported_synthesizers():
+    """Get SDGym supported synthesizers.
+    Returns:
+        list:
+            A list of available SDGym supported synthesizer names.
+    """
+    return BaselineSynthesizer._get_supported_synthesizers()

sdgym 0.11.1.dev0__tar.gz → 0.11.2.dev0__tar.gz

sdgym 0.11.1.dev0tar.gz → 0.11.2.dev0tar.gz