PyPI - sdgym - Versions diffs - 0.9.1.dev0__tar.gz → 0.10.0.dev0__tar.gz - Mend

sdgym 0.9.1.dev0tar.gz → 0.10.0.dev0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{sdgym-0.9.1.dev0 → sdgym-0.10.0.dev0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: sdgym
-Version: 0.9.1.dev0
+Version: 0.10.0.dev0
 Summary: Benchmark tabular synthetic data generators using a variety of datasets
 Author-email: "DataCebo, Inc." <info@sdv.dev>
 License: BSL-1.1
@@ -30,9 +30,9 @@ Requires-Dist: botocore<2,>=1.31
 Requires-Dist: cloudpickle>=2.1.0
 Requires-Dist: compress-pickle>=1.2.0
 Requires-Dist: humanfriendly>=8.2
-Requires-Dist: numpy<2.0.0,>=1.21.0; python_version < "3.10"
-Requires-Dist: numpy<2.0.0,>=1.23.3; python_version >= "3.10" and python_version < "3.12"
-Requires-Dist: numpy<2.0.0,>=1.26.0; python_version >= "3.12"
+Requires-Dist: numpy>=1.21.6; python_version < "3.10"
+Requires-Dist: numpy>=1.23.3; python_version >= "3.10" and python_version < "3.12"
+Requires-Dist: numpy>=1.26.0; python_version >= "3.12"
 Requires-Dist: pandas>=1.4.0; python_version < "3.11"
 Requires-Dist: pandas>=1.5.0; python_version >= "3.11" and python_version < "3.12"
 Requires-Dist: pandas>=2.1.1; python_version >= "3.12"
@@ -45,18 +45,23 @@ Requires-Dist: scipy>=1.7.3; python_version < "3.10"
 Requires-Dist: scipy>=1.9.2; python_version >= "3.10" and python_version < "3.12"
 Requires-Dist: scipy>=1.12.0; python_version >= "3.12"
 Requires-Dist: tabulate<0.9,>=0.8.3
-Requires-Dist: torch>=1.9.0; python_version < "3.10"
+Requires-Dist: torch>=1.12.1; python_version < "3.10"
 Requires-Dist: torch>=2.0.0; python_version >= "3.10" and python_version < "3.12"
 Requires-Dist: torch>=2.2.0; python_version >= "3.12"
-Requires-Dist: tqdm>=4.29
+Requires-Dist: tqdm>=4.66.3
 Requires-Dist: XlsxWriter>=1.2.8
-Requires-Dist: rdt>=1.12.1
-Requires-Dist: sdmetrics>=0.14.1
-Requires-Dist: sdv>=1.13.1
+Requires-Dist: rdt>=1.13.1
+Requires-Dist: sdmetrics>=0.17.0
+Requires-Dist: sdv>=1.17.2
 Provides-Extra: dask
 Requires-Dist: dask; extra == "dask"
 Requires-Dist: distributed; extra == "dask"
+Provides-Extra: realtabformer
+Requires-Dist: realtabformer>=0.2.2; extra == "realtabformer"
+Requires-Dist: torch>=2.0.0; (python_version >= "3.8" and python_version < "3.12") and extra == "realtabformer"
+Requires-Dist: torch>=2.2.0; python_version >= "3.12" and extra == "realtabformer"
 Provides-Extra: test
+Requires-Dist: sdgym[realtabformer]; extra == "test"
 Requires-Dist: pytest>=6.2.5; extra == "test"
 Requires-Dist: pytest-cov>=2.6.0; extra == "test"
 Requires-Dist: jupyter<2,>=1.0.0; extra == "test"

{sdgym-0.9.1.dev0 → sdgym-0.10.0.dev0}/pyproject.toml RENAMED Viewed

@@ -27,9 +27,9 @@ dependencies = [
     'cloudpickle>=2.1.0',
     'compress-pickle>=1.2.0',
     'humanfriendly>=8.2',
-    "numpy>=1.21.0,<2.0.0;python_version<'3.10'",
-    "numpy>=1.23.3,<2.0.0;python_version>='3.10' and python_version<'3.12'",
-    "numpy>=1.26.0,<2.0.0;python_version>='3.12'",
+    "numpy>=1.21.6;python_version<'3.10'",
+    "numpy>=1.23.3;python_version>='3.10' and python_version<'3.12'",
+    "numpy>=1.26.0;python_version>='3.12'",
     "pandas>=1.4.0;python_version<'3.11'",
     "pandas>=1.5.0;python_version>='3.11' and python_version<'3.12'",
     "pandas>=2.1.1;python_version>='3.12'",
@@ -42,14 +42,14 @@ dependencies = [
     "scipy>=1.9.2;python_version>='3.10' and python_version<'3.12'",
     "scipy>=1.12.0;python_version>='3.12'",
     'tabulate>=0.8.3,<0.9',
-    "torch>=1.9.0;python_version<'3.10'",
+    "torch>=1.12.1;python_version<'3.10'",
     "torch>=2.0.0;python_version>='3.10' and python_version<'3.12'",
     "torch>=2.2.0;python_version>='3.12'",
-    'tqdm>=4.29',
+    'tqdm>=4.66.3',
     'XlsxWriter>=1.2.8',
-    'rdt>=1.12.1',
-    'sdmetrics>=0.14.1',
-    'sdv>=1.13.1',
+    'rdt>=1.13.1',
+    'sdmetrics>=0.17.0',
+    'sdv>=1.17.2',
 ]
 [project.urls]
@@ -64,7 +64,13 @@ sdgym = { main = 'sdgym.cli.__main__:main' }
 [project.optional-dependencies]
 dask = ['dask', 'distributed']
+realtabformer = [
+    'realtabformer>=0.2.2',
+    "torch>=2.0.0;python_version>='3.8' and python_version<'3.12'",
+    "torch>=2.2.0;python_version>='3.12'",
+]
 test = [
+    'sdgym[realtabformer]',
     'pytest>=6.2.5',
     'pytest-cov>=2.6.0',
     'jupyter>=1.0.0,<2',
@@ -134,7 +140,7 @@ namespaces = false
 version = {attr = 'sdgym.__version__'}
 [tool.bumpversion]
-current_version = "0.9.1.dev0"
+current_version = "0.10.0.dev0"
 parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
 serialize = [
     '{major}.{minor}.{patch}.{release}{candidate}',
@@ -198,10 +204,11 @@ select = [
     # print statements
     "T201",
     # pandas-vet
-    "PD"
+    "PD",
+    # numpy 2.0
+    "NPY201"
 ]
 ignore = [
-    "E501",
     # pydocstyle
     "D107",  # Missing docstring in __init__
     "D417",   # Missing argument descriptions in the docstring, this is a bug from pydocstyle: https://github.com/PyCQA/pydocstyle/issues/449
@@ -230,4 +237,4 @@ convention = "google"
 [tool.ruff.lint.pycodestyle]
 max-doc-length = 100
-max-line-length = 100
+max-line-length = 100

{sdgym-0.9.1.dev0 → sdgym-0.10.0.dev0}/sdgym/__init__.py RENAMED Viewed

@@ -8,7 +8,7 @@ __author__ = 'DataCebo, Inc.'
 __copyright__ = 'Copyright (c) 2022 DataCebo, Inc.'
 __email__ = 'info@sdv.dev'
 __license__ = 'BSL-1.1'
-__version__ = '0.9.1.dev0'
+__version__ = '0.10.0.dev0'
 import logging

{sdgym-0.9.1.dev0 → sdgym-0.10.0.dev0}/sdgym/benchmark.py RENAMED Viewed

@@ -66,8 +66,7 @@ N_BYTES_IN_MB = 1000 * 1000
 def _validate_inputs(output_filepath, detailed_results_folder, synthesizers, custom_synthesizers):
     if output_filepath and os.path.exists(output_filepath):
         raise ValueError(
-            f'{output_filepath} already exists. '
-            'Please provide a file that does not already exist.'
+            f'{output_filepath} already exists. Please provide a file that does not already exist.'
         )
     if detailed_results_folder and os.path.exists(detailed_results_folder):
@@ -149,14 +148,14 @@ def _generate_job_args_list(
 def _synthesize(synthesizer_dict, real_data, metadata):
     synthesizer = synthesizer_dict['synthesizer']
     if isinstance(synthesizer, type):
-        assert issubclass(
-            synthesizer, BaselineSynthesizer
-        ), '`synthesizer` must be a synthesizer class'
+        assert issubclass(synthesizer, BaselineSynthesizer), (
+            '`synthesizer` must be a synthesizer class'
+        )
         synthesizer = synthesizer()
     else:
-        assert issubclass(
-            type(synthesizer), BaselineSynthesizer
-        ), '`synthesizer` must be an instance of a synthesizer class.'
+        assert issubclass(type(synthesizer), BaselineSynthesizer), (
+            '`synthesizer` must be an instance of a synthesizer class.'
+        )
     get_synthesizer = synthesizer.get_trained_synthesizer
     sample_from_synthesizer = synthesizer.sample_from_synthesizer
@@ -747,6 +746,7 @@ def benchmark_single_table(
                 - ``CTGANSynthesizer``
                 - ``CopulaGANSynthesizer``
                 - ``TVAESynthesizer``
+                - ``RealTabFormerSynthesizer``
         custom_synthesizers (list[class] or ``None``):
             A list of custom synthesizer classes to use. These can be completely custom or

{sdgym-0.9.1.dev0 → sdgym-0.10.0.dev0}/sdgym/cli/__main__.py RENAMED Viewed

@@ -180,9 +180,9 @@ def _get_parser():
     )
     run.add_argument('-m', '--metrics', nargs='+', help='Metrics to apply. Accepts multiple names.')
     run.add_argument('-b', '--bucket', help='Bucket from which to download the datasets.')
-    run.add_argument('-dp' '--datasets-path', help='Path where datasets can be found.')
+    run.add_argument('-dp--datasets-path', help='Path where datasets can be found.')
     run.add_argument(
-        '-dm' '--modalities', nargs='+', help='Data Modalities to run. Accepts multiple names.'
+        '-dm--modalities', nargs='+', help='Data Modalities to run. Accepts multiple names.'
     )
     run.add_argument('-i', '--iterations', type=int, default=1, help='Number of iterations.')
     run.add_argument(
@@ -219,13 +219,13 @@ def _get_parser():
         '-g', '--groupby', nargs='+', help='Group scores leaderboard by the given fields.'
     )
     run.add_argument(
-        '-ak' '--aws-key',
+        '-ak--aws-key',
         type=str,
         required=False,
         help='Aws access key ID to use when reading datasets.',
     )
     run.add_argument(
-        '-as' '--aws-secret',
+        '-as--aws-secret',
         type=str,
         required=False,
         help='Aws secret access key to use when reading datasets.',
@@ -234,10 +234,10 @@ def _get_parser():
         '-j', '--jobs', type=str, required=False, help='Serialized list of jobs to run.'
     )
     run.add_argument(
-        '-mr' '--max-rows', type=int, help='Cap the number of rows to model from each dataset.'
+        '-mr--max-rows', type=int, help='Cap the number of rows to model from each dataset.'
     )
     run.add_argument(
-        '-mc' '--max-columns',
+        '-mc--max-columns',
         type=int,
         help='Cap the number of columns to model from each dataset.',
     )

{sdgym-0.9.1.dev0 → sdgym-0.10.0.dev0}/sdgym/synthesizers/__init__.py RENAMED Viewed

@@ -9,6 +9,7 @@ from sdgym.synthesizers.generate import (
 )
 from sdgym.synthesizers.identity import DataIdentity
 from sdgym.synthesizers.column import ColumnSynthesizer
+from sdgym.synthesizers.realtabformer import RealTabFormerSynthesizer
 from sdgym.synthesizers.sdv import (
     CopulaGANSynthesizer,
     CTGANSynthesizer,
@@ -38,4 +39,5 @@ __all__ = (
     'create_sdv_synthesizer_variant',
     'create_sequential_synthesizer',
     'SYNTHESIZER_MAPPING',
+    'RealTabFormerSynthesizer',
 )

{sdgym-0.9.1.dev0 → sdgym-0.10.0.dev0}/sdgym/synthesizers/base.py RENAMED Viewed

@@ -2,9 +2,9 @@
 import abc
 import logging
+import warnings
-from sdv.metadata.multi_table import MultiTableMetadata
-from sdv.metadata.single_table import SingleTableMetadata
+from sdv.metadata import Metadata
 LOGGER = logging.getLogger(__name__)
@@ -54,8 +54,11 @@ class BaselineSynthesizer(abc.ABC):
             obj:
                 The synthesizer object.
         """
-        metadata_class = MultiTableMetadata() if 'tables' in metadata else SingleTableMetadata()
-        metadata = metadata_class.load_from_dict(metadata)
+        metadata_object = Metadata()
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', UserWarning)
+            metadata = metadata_object.load_from_dict(metadata)
         return self._get_trained_synthesizer(data, metadata)
     def sample_from_synthesizer(self, synthesizer, n_samples):

{sdgym-0.9.1.dev0 → sdgym-0.10.0.dev0}/sdgym/synthesizers/column.py RENAMED Viewed

@@ -1,11 +1,16 @@
 """ColumnSynthesizer module."""
+import logging
 import pandas as pd
 from rdt.hyper_transformer import HyperTransformer
+from sdv.metadata import Metadata
 from sklearn.mixture import GaussianMixture
 from sdgym.synthesizers.base import BaselineSynthesizer
+LOGGER = logging.getLogger(__name__)
 class ColumnSynthesizer(BaselineSynthesizer):
     """Synthesizer that learns each column independently.
@@ -17,6 +22,27 @@ class ColumnSynthesizer(BaselineSynthesizer):
     def _get_trained_synthesizer(self, real_data, metadata):
         hyper_transformer = HyperTransformer()
         hyper_transformer.detect_initial_config(real_data)
+        supported_sdtypes = hyper_transformer._get_supported_sdtypes()
+        config = {}
+        if isinstance(metadata, Metadata):
+            table_name = metadata._get_single_table_name()
+            columns = metadata.tables[table_name].columns
+        else:
+            columns = metadata.columns
+        for column_name, column in columns.items():
+            sdtype = column['sdtype']
+            if sdtype in supported_sdtypes:
+                config[column_name] = sdtype
+            elif column.get('pii', False):
+                config[column_name] = 'pii'
+            else:
+                LOGGER.info(
+                    f'Column {column} sdtype: {sdtype} is not supported, '
+                    f'defaulting to inferred type.'
+                )
+        hyper_transformer.update_sdtypes(config)
         # This is done to match the behavior of the synthesizer for SDGym <= 0.6.0
         columns_to_remove = [

{sdgym-0.9.1.dev0 → sdgym-0.10.0.dev0}/sdgym/synthesizers/generate.py RENAMED Viewed

@@ -49,7 +49,7 @@ def create_sdv_synthesizer_variant(display_name, synthesizer_class, synthesizer_
     if synthesizer_class not in SYNTHESIZER_MAPPING.keys():
         raise ValueError(
             f'Synthesizer class {synthesizer_class} is not recognized. '
-            f"The supported options are {', '.join(SYNTHESIZER_MAPPING.keys())}"
+            f'The supported options are {", ".join(SYNTHESIZER_MAPPING.keys())}'
         )
     baseclass = SDVTabularSynthesizer

sdgym-0.10.0.dev0/sdgym/synthesizers/realtabformer.py ADDED Viewed

@@ -0,0 +1,46 @@
+"""REaLTabFormer integration."""
+import contextlib
+import logging
+from functools import partialmethod
+import tqdm
+from sdgym.synthesizers.base import BaselineSynthesizer
+@contextlib.contextmanager
+def prevent_tqdm_output():
+    """Temporarily disables tqdm m."""
+    tqdm.__init__ = partialmethod(tqdm.__init__, disable=True)
+    try:
+        yield
+    finally:
+        tqdm.__init__ = partialmethod(tqdm.__init__, disable=False)
+class RealTabFormerSynthesizer(BaselineSynthesizer):
+    """Custom wrapper for the REaLTabFormer synthesizer to make it work with SDGym."""
+    LOGGER = logging.getLogger(__name__)
+    _MODEL_KWARGS = None
+    def _get_trained_synthesizer(self, data, metadata):
+        try:
+            from realtabformer import REaLTabFormer
+        except Exception as exception:
+            raise ValueError(
+                "In order to use 'RealTabFormerSynthesizer' you have to install the extra"
+                " dependencies by running  pip install sdgym['realtabformer'] "
+            ) from exception
+        with prevent_tqdm_output():
+            model_kwargs = self._MODEL_KWARGS.copy() if self._MODEL_KWARGS else {}
+            model = REaLTabFormer(model_type='tabular', **model_kwargs)
+            model.fit(data, device='cpu')
+        return model
+    def _sample_from_synthesizer(self, synthesizer, n_sample):
+        """Sample synthetic data with specified sample count."""
+        return synthesizer.sample(n_sample, device='cpu')

{sdgym-0.9.1.dev0 → sdgym-0.10.0.dev0}/sdgym/synthesizers/uniform.py RENAMED Viewed

@@ -19,7 +19,8 @@ class UniformSynthesizer(BaselineSynthesizer):
         hyper_transformer.detect_initial_config(real_data)
         supported_sdtypes = hyper_transformer._get_supported_sdtypes()
         config = {}
-        for column_name, column in metadata.columns.items():
+        table = next(iter(metadata.tables.values()))
+        for column_name, column in table.columns.items():
             sdtype = column['sdtype']
             if sdtype in supported_sdtypes:
                 config[column_name] = sdtype
@@ -27,7 +28,7 @@ class UniformSynthesizer(BaselineSynthesizer):
                 config[column_name] = 'pii'
             else:
                 LOGGER.info(
-                    f'Column {column} sdtype: {sdtype} is not supported, '
+                    f'Column {column_name} sdtype: {sdtype} is not supported, '
                     f'defaulting to inferred type.'
                 )

{sdgym-0.9.1.dev0 → sdgym-0.10.0.dev0}/sdgym.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: sdgym
-Version: 0.9.1.dev0
+Version: 0.10.0.dev0
 Summary: Benchmark tabular synthetic data generators using a variety of datasets
 Author-email: "DataCebo, Inc." <info@sdv.dev>
 License: BSL-1.1
@@ -30,9 +30,9 @@ Requires-Dist: botocore<2,>=1.31
 Requires-Dist: cloudpickle>=2.1.0
 Requires-Dist: compress-pickle>=1.2.0
 Requires-Dist: humanfriendly>=8.2
-Requires-Dist: numpy<2.0.0,>=1.21.0; python_version < "3.10"
-Requires-Dist: numpy<2.0.0,>=1.23.3; python_version >= "3.10" and python_version < "3.12"
-Requires-Dist: numpy<2.0.0,>=1.26.0; python_version >= "3.12"
+Requires-Dist: numpy>=1.21.6; python_version < "3.10"
+Requires-Dist: numpy>=1.23.3; python_version >= "3.10" and python_version < "3.12"
+Requires-Dist: numpy>=1.26.0; python_version >= "3.12"
 Requires-Dist: pandas>=1.4.0; python_version < "3.11"
 Requires-Dist: pandas>=1.5.0; python_version >= "3.11" and python_version < "3.12"
 Requires-Dist: pandas>=2.1.1; python_version >= "3.12"
@@ -45,18 +45,23 @@ Requires-Dist: scipy>=1.7.3; python_version < "3.10"
 Requires-Dist: scipy>=1.9.2; python_version >= "3.10" and python_version < "3.12"
 Requires-Dist: scipy>=1.12.0; python_version >= "3.12"
 Requires-Dist: tabulate<0.9,>=0.8.3
-Requires-Dist: torch>=1.9.0; python_version < "3.10"
+Requires-Dist: torch>=1.12.1; python_version < "3.10"
 Requires-Dist: torch>=2.0.0; python_version >= "3.10" and python_version < "3.12"
 Requires-Dist: torch>=2.2.0; python_version >= "3.12"
-Requires-Dist: tqdm>=4.29
+Requires-Dist: tqdm>=4.66.3
 Requires-Dist: XlsxWriter>=1.2.8
-Requires-Dist: rdt>=1.12.1
-Requires-Dist: sdmetrics>=0.14.1
-Requires-Dist: sdv>=1.13.1
+Requires-Dist: rdt>=1.13.1
+Requires-Dist: sdmetrics>=0.17.0
+Requires-Dist: sdv>=1.17.2
 Provides-Extra: dask
 Requires-Dist: dask; extra == "dask"
 Requires-Dist: distributed; extra == "dask"
+Provides-Extra: realtabformer
+Requires-Dist: realtabformer>=0.2.2; extra == "realtabformer"
+Requires-Dist: torch>=2.0.0; (python_version >= "3.8" and python_version < "3.12") and extra == "realtabformer"
+Requires-Dist: torch>=2.2.0; python_version >= "3.12" and extra == "realtabformer"
 Provides-Extra: test
+Requires-Dist: sdgym[realtabformer]; extra == "test"
 Requires-Dist: pytest>=6.2.5; extra == "test"
 Requires-Dist: pytest-cov>=2.6.0; extra == "test"
 Requires-Dist: jupyter<2,>=1.0.0; extra == "test"

{sdgym-0.9.1.dev0 → sdgym-0.10.0.dev0}/sdgym.egg-info/SOURCES.txt RENAMED Viewed

@@ -25,6 +25,7 @@ sdgym/synthesizers/base.py
 sdgym/synthesizers/column.py
 sdgym/synthesizers/generate.py
 sdgym/synthesizers/identity.py
+sdgym/synthesizers/realtabformer.py
 sdgym/synthesizers/sdv.py
 sdgym/synthesizers/uniform.py
 tests/test_tasks.py

{sdgym-0.9.1.dev0 → sdgym-0.10.0.dev0}/sdgym.egg-info/requires.txt RENAMED Viewed

@@ -6,17 +6,17 @@ compress-pickle>=1.2.0
 humanfriendly>=8.2
 psutil>=5.7
 tabulate<0.9,>=0.8.3
-tqdm>=4.29
+tqdm>=4.66.3
 XlsxWriter>=1.2.8
-rdt>=1.12.1
-sdmetrics>=0.14.1
-sdv>=1.13.1
+rdt>=1.13.1
+sdmetrics>=0.17.0
+sdv>=1.17.2
 [:python_version < "3.10"]
-numpy<2.0.0,>=1.21.0
+numpy>=1.21.6
 scikit-learn>=1.0.2
 scipy>=1.7.3
-torch>=1.9.0
+torch>=1.12.1
 [:python_version < "3.11"]
 pandas>=1.4.0
@@ -25,7 +25,7 @@ pandas>=1.4.0
 scikit-learn>=1.1.0
 [:python_version >= "3.10" and python_version < "3.12"]
-numpy<2.0.0,>=1.23.3
+numpy>=1.23.3
 scipy>=1.9.2
 torch>=2.0.0
@@ -34,7 +34,7 @@ pandas>=1.5.0
 scikit-learn>=1.1.3
 [:python_version >= "3.12"]
-numpy<2.0.0,>=1.26.0
+numpy>=1.26.0
 pandas>=2.1.1
 scikit-learn>=1.3.1
 scipy>=1.12.0
@@ -61,7 +61,17 @@ tox<5,>=2.9.1
 importlib-metadata>=3.6
 invoke
+[realtabformer]
+realtabformer>=0.2.2
+[realtabformer:python_version >= "3.12"]
+torch>=2.2.0
+[realtabformer:python_version >= "3.8" and python_version < "3.12"]
+torch>=2.0.0
 [test]
+sdgym[realtabformer]
 pytest>=6.2.5
 pytest-cov>=2.6.0
 jupyter<2,>=1.0.0

sdgym-0.10.0.dev0/tests/test_tasks.py ADDED Viewed

@@ -0,0 +1,207 @@
+"""Tests for the ``tasks.py`` file."""
+from tasks import _get_extra_dependencies, _get_minimum_versions, _resolve_version_conflicts
+def test_get_minimum_versions():
+    """Test the ``_get_minimum_versions`` method.
+    The method should return the minimum versions of the dependencies for the given python version.
+    If a library is linked to an URL, the minimum version should be the URL.
+    """
+    # Setup
+    dependencies = [
+        "numpy>=1.20.0,<2;python_version<'3.10'",
+        "numpy>=1.23.3,<2;python_version>='3.10'",
+        "pandas>=1.2.0,<2;python_version<'3.10'",
+        "pandas>=1.3.0,<2;python_version>='3.10'",
+        'humanfriendly>=8.2,<11',
+        'pandas @ git+https://github.com/pandas-dev/pandas.git@master',
+    ]
+    # Run
+    minimum_versions_39 = _get_minimum_versions(dependencies, '3.9')
+    minimum_versions_310 = _get_minimum_versions(dependencies, '3.10')
+    # Assert
+    expected_versions_39 = {
+        'numpy': 'numpy==1.20.0',
+        'pandas': 'git+https://github.com/pandas-dev/pandas.git@master#egg=pandas',
+        'humanfriendly': 'humanfriendly==8.2',
+    }
+    expected_versions_310 = {
+        'numpy': 'numpy==1.23.3',
+        'pandas': 'git+https://github.com/pandas-dev/pandas.git@master#egg=pandas',
+        'humanfriendly': 'humanfriendly==8.2',
+    }
+    assert minimum_versions_39 == expected_versions_39
+    assert minimum_versions_310 == expected_versions_310
+def _get_example_pyproject_dict():
+    return {
+        'build-system': {
+            'build-backend': 'setuptools.build_meta',
+            'requires': ['setuptools', 'wheel'],
+        },
+        'project': {
+            'authors': [{'email': 'info@sdv.dev', 'name': 'DataCebo, Inc.'}],
+            'classifiers': [
+                'Intended Audience :: Developers',
+                'License :: Free for non-commercial use',
+                'Natural Language :: English',
+                'Programming Language :: Python :: 3.10',
+                'Programming Language :: Python :: 3.11',
+                'Programming Language :: Python :: 3.12',
+                'Topic :: Scientific/Engineering :: Artificial Intelligence',
+            ],
+            'dependencies': [
+                'appdirs>=1.3',
+                'boto3>=1.28,<2',
+                'botocore>=1.31,<2',
+                'cloudpickle>=2.1.0',
+                'compress-pickle>=1.2.0',
+                'humanfriendly>=8.2',
+                "numpy>=1.21.6;python_version<'3.10'",
+                "numpy>=1.23.3;python_version>='3.10' and python_version<'3.12'",
+                "numpy>=1.26.0;python_version>='3.12'",
+                "pandas>=1.4.0;python_version<'3.11'",
+                "pandas>=1.5.0;python_version>='3.11' and python_version<'3.12'",
+                "pandas>=2.1.1;python_version>='3.12'",
+                'psutil>=5.7',
+                "scikit-learn>=1.0.2;python_version<'3.10'",
+                "scikit-learn>=1.1.0;python_version>='3.10' and python_version<'3.11'",
+                "scikit-learn>=1.1.3;python_version>='3.11' and python_version<'3.12'",
+                "scikit-learn>=1.3.1;python_version>='3.12'",
+                "scipy>=1.7.3;python_version<'3.10'",
+                "scipy>=1.9.2;python_version>='3.10' and python_version<'3.12'",
+                "scipy>=1.12.0;python_version>='3.12'",
+                'tabulate>=0.8.3,<0.9',
+                "torch>=1.12.1;python_version<'3.10'",
+                "torch>=2.0.0;python_version>='3.10' and python_version<'3.12'",
+                "torch>=2.2.0;python_version>='3.12'",
+                'tqdm>=4.66.3',
+                'XlsxWriter>=1.2.8',
+                'rdt>=1.13.1',
+                'sdmetrics>=0.17.0',
+                'sdv>=1.17.2',
+            ],
+            'dynamic': ['version'],
+            'license': {'text': 'BSL-1.1'},
+            'name': 'sdgym',
+            'optional-dependencies': {
+                'all': ['sdgym[dask, test, dev]'],
+                'dask': ['dask', 'distributed'],
+                'dev': [
+                    'sdgym[dask, test]',
+                    'build>=1.0.0,<2',
+                    'bump-my-version>=0.18.3,<1',
+                    'pip>=9.0.1',
+                    'watchdog>=1.0.1,<5',
+                    'ruff>=0.4.5,<1',
+                    'twine>=1.10.0,<6',
+                    'wheel>=0.30.0',
+                    'coverage>=4.5.12,<8',
+                    'tox>=2.9.1,<5',
+                    'importlib-metadata>=3.6',
+                    'invoke',
+                ],
+                'realtabformer': ['realtabformer>=0.2.1'],
+                'test': [
+                    'sdgym[realtabformer]',
+                    'pytest>=6.2.5',
+                ],
+            },
+            'readme': 'README.md',
+            'requires-python': '>=3.8,<3.13',
+        },
+        'tool': {
+            'bumpversion': {
+                'allow_dirty': False,
+                'commit': True,
+                'commit_args': '',
+            },
+            'ruff': {
+                'exclude': [
+                    'docs',
+                    '.tox',
+                    '.git',
+                    '__pycache__',
+                    '.ipynb_checkpoints',
+                    'tasks.py',
+                ],
+                'indent-width': 4,
+            },
+        },
+    }
+def test__get_extra_dependencies():
+    """Test that the proper dependency strings are extracted from the pyproject dictionary."""
+    # Setup
+    pyproject_dict = _get_example_pyproject_dict()
+    # Run
+    extra_dependencies = _get_extra_dependencies(pyproject_dict)
+    # Assert
+    assert extra_dependencies == ['realtabformer>=0.2.1']
+def test__resolve_version_conflicts_conflicting_versions():
+    """Test that any conflicts for the same dependency are resolved to the higher version."""
+    # Setup
+    deps = {
+        'numpy': 'numpy==2.0.1',
+        'pandas': 'pandas==2.2.1',
+        'sdv': 'sdv==2.1.1',
+        'rdt': 'rdt==1.1.2',
+    }
+    extra_deps = {
+        'numpy': 'numpy==2.0.0',
+        'pandas': 'pandas==2.3.0',
+        'sdv': 'sdv==3.0.0',
+        'copulas': 'copulas==0.12.0',
+    }
+    # Run
+    versions = _resolve_version_conflicts(deps, extra_deps)
+    # Assert
+    assert sorted(versions) == sorted([
+        'numpy==2.0.1',
+        'pandas==2.3.0',
+        'sdv==3.0.0',
+        'rdt==1.1.2',
+        'copulas==0.12.0',
+    ])
+def test__resolve_version_conflicts_pointing_to_branch():
+    """Test specific branches are always selected over normal version numbers."""
+    # Setup
+    deps = {
+        'numpy': 'git+https://github.com/numpy-dev/numpy.git@master#egg=numpy',
+        'pandas': 'pandas==2.2.1',
+        'sdv': 'sdv==2.1.1',
+        'rdt': 'rdt==1.1.2',
+    }
+    extra_deps = {
+        'numpy': 'numpy==2.0.0',
+        'pandas': 'git+https://github.com/pandas-dev/pandas.git@master#egg=pandas',
+        'sdv': 'sdv==3.0.0',
+        'copulas': 'copulas==0.12.0',
+    }
+    # Run
+    versions = _resolve_version_conflicts(deps, extra_deps)
+    # Assert
+    assert sorted(versions) == sorted([
+        'git+https://github.com/numpy-dev/numpy.git@master#egg=numpy',
+        'git+https://github.com/pandas-dev/pandas.git@master#egg=pandas',
+        'sdv==3.0.0',
+        'rdt==1.1.2',
+        'copulas==0.12.0',
+    ])

sdgym-0.9.1.dev0/tests/test_tasks.py DELETED Viewed

@@ -1,39 +0,0 @@
-"""Tests for the ``tasks.py`` file."""
-from tasks import _get_minimum_versions
-def test_get_minimum_versions():
-    """Test the ``_get_minimum_versions`` method.
-    The method should return the minimum versions of the dependencies for the given python version.
-    If a library is linked to an URL, the minimum version should be the URL.
-    """
-    # Setup
-    dependencies = [
-        "numpy>=1.20.0,<2;python_version<'3.10'",
-        "numpy>=1.23.3,<2;python_version>='3.10'",
-        "pandas>=1.2.0,<2;python_version<'3.10'",
-        "pandas>=1.3.0,<2;python_version>='3.10'",
-        'humanfriendly>=8.2,<11',
-        'pandas @ git+https://github.com/pandas-dev/pandas.git@master',
-    ]
-    # Run
-    minimum_versions_39 = _get_minimum_versions(dependencies, '3.9')
-    minimum_versions_310 = _get_minimum_versions(dependencies, '3.10')
-    # Assert
-    expected_versions_39 = [
-        'numpy==1.20.0',
-        'git+https://github.com/pandas-dev/pandas.git@master#egg=pandas',
-        'humanfriendly==8.2',
-    ]
-    expected_versions_310 = [
-        'numpy==1.23.3',
-        'git+https://github.com/pandas-dev/pandas.git@master#egg=pandas',
-        'humanfriendly==8.2',
-    ]
-    assert minimum_versions_39 == expected_versions_39
-    assert minimum_versions_310 == expected_versions_310