PyPI - data-designer - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

data-designer 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

data_designer/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.1.1'
-__version_tuple__ = version_tuple = (0, 1, 1)
+__version__ = version = '0.1.3'
+__version_tuple__ = version_tuple = (0, 1, 3)
 __commit_id__ = commit_id = None

data_designer/config/column_configs.py CHANGED Viewed

@@ -2,9 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0
 from abc import ABC
-from typing import Literal, Optional, Type, Union
+from typing import Annotated, Literal, Optional, Type, Union
-from pydantic import BaseModel, Field, model_validator
+from pydantic import BaseModel, Discriminator, Field, model_validator
 from typing_extensions import Self
 from .base import ConfigBase
@@ -89,11 +89,36 @@ class SamplerColumnConfig(SingleColumnConfig):
     """
     sampler_type: SamplerType
-    params: SamplerParamsT
-    conditional_params: dict[str, SamplerParamsT] = {}
+    params: Annotated[SamplerParamsT, Discriminator("sampler_type")]
+    conditional_params: dict[str, Annotated[SamplerParamsT, Discriminator("sampler_type")]] = {}
     convert_to: Optional[str] = None
     column_type: Literal["sampler"] = "sampler"
+    @model_validator(mode="before")
+    @classmethod
+    def inject_sampler_type_into_params(cls, data: dict) -> dict:
+        """Inject sampler_type into params dict to enable discriminated union resolution.
+        This allows users to pass params as a simple dict without the sampler_type field,
+        which will be automatically added based on the outer sampler_type field.
+        """
+        if isinstance(data, dict):
+            sampler_type = data.get("sampler_type")
+            params = data.get("params")
+            # If params is a dict and doesn't have sampler_type, inject it
+            if sampler_type and isinstance(params, dict) and "sampler_type" not in params:
+                data["params"] = {"sampler_type": sampler_type, **params}
+            # Handle conditional_params similarly
+            conditional_params = data.get("conditional_params")
+            if conditional_params and isinstance(conditional_params, dict):
+                for condition, cond_params in conditional_params.items():
+                    if isinstance(cond_params, dict) and "sampler_type" not in cond_params:
+                        data["conditional_params"][condition] = {"sampler_type": sampler_type, **cond_params}
+        return data
 class LLMTextColumnConfig(SingleColumnConfig):
     """Configuration for text generation columns using Large Language Models.

data_designer/config/datastore.py CHANGED Viewed

@@ -31,34 +31,37 @@ class DatastoreSettings(BaseModel):
     token: Optional[str] = Field(default=None, description="If needed, token to use for authentication.")
-def get_file_column_names(file_path: Union[str, Path], file_type: str) -> list[str]:
-    """Extract column names based on file type. Supports glob patterns like '../path/*.parquet'."""
-    file_path = Path(file_path)
-    if "*" in str(file_path):
-        matching_files = sorted(file_path.parent.glob(file_path.name))
-        if not matching_files:
-            raise InvalidFilePathError(f"🛑 No files found matching pattern: {str(file_path)!r}")
-        logger.debug(f"0️⃣ Using the first matching file in {str(file_path)!r} to determine column names in seed dataset")
-        file_path = matching_files[0]
+def get_file_column_names(file_reference: Union[str, Path, HfFileSystem], file_type: str) -> list[str]:
+    """Get column names from a dataset file.
+    Args:
+        file_reference: Path to the dataset file, or an HfFileSystem object.
+        file_type: Type of the dataset file. Must be one of: 'parquet', 'json', 'jsonl', 'csv'.
+    Raises:
+        InvalidFilePathError: If the file type is not supported.
+    Returns:
+        List of column names.
+    """
     if file_type == "parquet":
         try:
-            schema = pq.read_schema(file_path)
+            schema = pq.read_schema(file_reference)
             if hasattr(schema, "names"):
                 return schema.names
             else:
                 return [field.name for field in schema]
         except Exception as e:
-            logger.warning(f"Failed to process parquet file {file_path}: {e}")
+            logger.warning(f"Failed to process parquet file {file_reference}: {e}")
             return []
     elif file_type in ["json", "jsonl"]:
-        return pd.read_json(file_path, orient="records", lines=True, nrows=1).columns.tolist()
+        return pd.read_json(file_reference, orient="records", lines=True, nrows=1).columns.tolist()
     elif file_type == "csv":
         try:
-            df = pd.read_csv(file_path, nrows=1)
+            df = pd.read_csv(file_reference, nrows=1)
             return df.columns.tolist()
         except (pd.errors.EmptyDataError, pd.errors.ParserError) as e:
-            logger.warning(f"Failed to process CSV file {file_path}: {e}")
+            logger.warning(f"Failed to process CSV file {file_reference}: {e}")
             return []
     else:
         raise InvalidFilePathError(f"🛑 Unsupported file type: {file_type!r}")
@@ -66,12 +69,36 @@ def get_file_column_names(file_path: Union[str, Path], file_type: str) -> list[s
 def fetch_seed_dataset_column_names(seed_dataset_reference: SeedDatasetReference) -> list[str]:
     if hasattr(seed_dataset_reference, "datastore_settings"):
-        return _fetch_seed_dataset_column_names_from_datastore(
+        return fetch_seed_dataset_column_names_from_datastore(
             seed_dataset_reference.repo_id,
             seed_dataset_reference.filename,
             seed_dataset_reference.datastore_settings,
         )
-    return _fetch_seed_dataset_column_names_from_local_file(seed_dataset_reference.dataset)
+    return fetch_seed_dataset_column_names_from_local_file(seed_dataset_reference.dataset)
+def fetch_seed_dataset_column_names_from_datastore(
+    repo_id: str,
+    filename: str,
+    datastore_settings: Optional[Union[DatastoreSettings, dict]] = None,
+) -> list[str]:
+    file_type = filename.split(".")[-1]
+    if f".{file_type}" not in VALID_DATASET_FILE_EXTENSIONS:
+        raise InvalidFileFormatError(f"🛑 Unsupported file type: {filename!r}")
+    datastore_settings = resolve_datastore_settings(datastore_settings)
+    fs = HfFileSystem(endpoint=datastore_settings.endpoint, token=datastore_settings.token, skip_instance_cache=True)
+    file_path = _extract_single_file_path_from_glob_pattern_if_present(f"datasets/{repo_id}/{filename}", fs=fs)
+    with fs.open(file_path) as f:
+        return get_file_column_names(f, file_type)
+def fetch_seed_dataset_column_names_from_local_file(dataset_path: str | Path) -> list[str]:
+    dataset_path = _validate_dataset_path(dataset_path, allow_glob_pattern=True)
+    dataset_path = _extract_single_file_path_from_glob_pattern_if_present(dataset_path)
+    return get_file_column_names(dataset_path, str(dataset_path).split(".")[-1])
 def resolve_datastore_settings(datastore_settings: DatastoreSettings | dict | None) -> DatastoreSettings:
@@ -114,25 +141,34 @@ def upload_to_hf_hub(
     return f"{repo_id}/{filename}"
-def _fetch_seed_dataset_column_names_from_datastore(
-    repo_id: str,
-    filename: str,
-    datastore_settings: Optional[Union[DatastoreSettings, dict]] = None,
-) -> list[str]:
-    file_type = filename.split(".")[-1]
-    if f".{file_type}" not in VALID_DATASET_FILE_EXTENSIONS:
-        raise InvalidFileFormatError(f"🛑 Unsupported file type: {filename!r}")
-    datastore_settings = resolve_datastore_settings(datastore_settings)
-    fs = HfFileSystem(endpoint=datastore_settings.endpoint, token=datastore_settings.token, skip_instance_cache=True)
-    with fs.open(f"datasets/{repo_id}/{filename}") as f:
-        return get_file_column_names(f, file_type)
+def _extract_single_file_path_from_glob_pattern_if_present(
+    file_path: str | Path,
+    fs: HfFileSystem | None = None,
+) -> Path:
+    file_path = Path(file_path)
-def _fetch_seed_dataset_column_names_from_local_file(dataset_path: str | Path) -> list[str]:
-    dataset_path = _validate_dataset_path(dataset_path, allow_glob_pattern=True)
-    return get_file_column_names(dataset_path, str(dataset_path).split(".")[-1])
+    # no glob pattern
+    if "*" not in str(file_path):
+        return file_path
+    # glob pattern with HfFileSystem
+    if fs is not None:
+        file_to_check = None
+        file_extension = file_path.name.split(".")[-1]
+        for file in fs.ls(str(file_path.parent)):
+            filename = file["name"]
+            if filename.endswith(f".{file_extension}"):
+                file_to_check = filename
+        if file_to_check is None:
+            raise InvalidFilePathError(f"🛑 No files found matching pattern: {str(file_path)!r}")
+        logger.debug(f"Using the first matching file in {str(file_path)!r} to determine column names in seed dataset")
+        return Path(file_to_check)
+    # glob pattern with local file system
+    if not (matching_files := sorted(file_path.parent.glob(file_path.name))):
+        raise InvalidFilePathError(f"🛑 No files found matching pattern: {str(file_path)!r}")
+    logger.debug(f"Using the first matching file in {str(file_path)!r} to determine column names in seed dataset")
+    return matching_files[0]
 def _validate_dataset_path(dataset_path: Union[str, Path], allow_glob_pattern: bool = False) -> Path:

data_designer/config/default_model_settings.py CHANGED Viewed

@@ -78,7 +78,7 @@ def get_default_model_configs() -> list[ModelConfig]:
     return []
-def get_defaul_model_providers_missing_api_keys() -> list[str]:
+def get_default_model_providers_missing_api_keys() -> list[str]:
     missing_api_keys = []
     for predefined_provider in PREDEFINED_PROVIDERS:
         if os.environ.get(predefined_provider["api_key"]) is None:

data_designer/config/sampler_params.py CHANGED Viewed

@@ -66,6 +66,7 @@ class CategorySamplerParams(ConfigBase):
             "Larger values will be sampled with higher probability."
         ),
     )
+    sampler_type: Literal[SamplerType.CATEGORY] = SamplerType.CATEGORY
     @model_validator(mode="after")
     def _normalize_weights_if_needed(self) -> Self:
@@ -106,6 +107,7 @@ class DatetimeSamplerParams(ConfigBase):
         default="D",
         description="Sampling units, e.g. the smallest possible time interval between samples.",
     )
+    sampler_type: Literal[SamplerType.DATETIME] = SamplerType.DATETIME
     @field_validator("start", "end")
     @classmethod
@@ -136,6 +138,7 @@ class SubcategorySamplerParams(ConfigBase):
         ...,
         description="Mapping from each value of parent category to a list of subcategory values.",
     )
+    sampler_type: Literal[SamplerType.SUBCATEGORY] = SamplerType.SUBCATEGORY
 class TimeDeltaSamplerParams(ConfigBase):
@@ -187,6 +190,7 @@ class TimeDeltaSamplerParams(ConfigBase):
         default="D",
         description="Sampling units, e.g. the smallest possible time interval between samples.",
     )
+    sampler_type: Literal[SamplerType.TIMEDELTA] = SamplerType.TIMEDELTA
     @model_validator(mode="after")
     def _validate_min_less_than_max(self) -> Self:
@@ -219,6 +223,7 @@ class UUIDSamplerParams(ConfigBase):
         default=False,
         description="If true, all letters in the UUID will be capitalized.",
     )
+    sampler_type: Literal[SamplerType.UUID] = SamplerType.UUID
     @property
     def last_index(self) -> int:
@@ -257,6 +262,7 @@ class ScipySamplerParams(ConfigBase):
     decimal_places: Optional[int] = Field(
         default=None, description="Number of decimal places to round the sampled values to."
     )
+    sampler_type: Literal[SamplerType.SCIPY] = SamplerType.SCIPY
 class BinomialSamplerParams(ConfigBase):
@@ -273,6 +279,7 @@ class BinomialSamplerParams(ConfigBase):
     n: int = Field(..., description="Number of trials.")
     p: float = Field(..., description="Probability of success on each trial.", ge=0.0, le=1.0)
+    sampler_type: Literal[SamplerType.BINOMIAL] = SamplerType.BINOMIAL
 class BernoulliSamplerParams(ConfigBase):
@@ -288,6 +295,7 @@ class BernoulliSamplerParams(ConfigBase):
     """
     p: float = Field(..., description="Probability of success.", ge=0.0, le=1.0)
+    sampler_type: Literal[SamplerType.BERNOULLI] = SamplerType.BERNOULLI
 class BernoulliMixtureSamplerParams(ConfigBase):
@@ -327,6 +335,7 @@ class BernoulliMixtureSamplerParams(ConfigBase):
         ...,
         description="Parameters of the scipy.stats distribution given in `dist_name`.",
     )
+    sampler_type: Literal[SamplerType.BERNOULLI_MIXTURE] = SamplerType.BERNOULLI_MIXTURE
 class GaussianSamplerParams(ConfigBase):
@@ -350,6 +359,7 @@ class GaussianSamplerParams(ConfigBase):
     decimal_places: Optional[int] = Field(
         default=None, description="Number of decimal places to round the sampled values to."
     )
+    sampler_type: Literal[SamplerType.GAUSSIAN] = SamplerType.GAUSSIAN
 class PoissonSamplerParams(ConfigBase):
@@ -369,6 +379,7 @@ class PoissonSamplerParams(ConfigBase):
     """
     mean: float = Field(..., description="Mean number of events in a fixed interval.")
+    sampler_type: Literal[SamplerType.POISSON] = SamplerType.POISSON
 class UniformSamplerParams(ConfigBase):
@@ -390,6 +401,7 @@ class UniformSamplerParams(ConfigBase):
     decimal_places: Optional[int] = Field(
         default=None, description="Number of decimal places to round the sampled values to."
     )
+    sampler_type: Literal[SamplerType.UNIFORM] = SamplerType.UNIFORM
 #########################################
@@ -418,9 +430,6 @@ class PersonSamplerParams(ConfigBase):
         age_range: Two-element list [min_age, max_age] specifying the age range to sample from
             (inclusive). Defaults to a standard age range. Both values must be between minimum and
             maximum allowed ages.
-        state: Only supported for "en_US" locale. Filters to sample people from specified US state(s).
-            Must be provided as two-letter state abbreviations (e.g., "CA", "NY", "TX"). Can be a
-            single state or a list of states.
         with_synthetic_personas: If True, appends additional synthetic persona columns including
             personality traits, interests, and background descriptions. Only supported for certain
             locales with managed datasets.
@@ -470,11 +479,12 @@ class PersonSamplerParams(ConfigBase):
         default=False,
         description="If True, then append synthetic persona columns to each generated person.",
     )
+    sampler_type: Literal[SamplerType.PERSON] = SamplerType.PERSON
     @property
     def generator_kwargs(self) -> list[str]:
         """Keyword arguments to pass to the person generator."""
-        return [f for f in list(PersonSamplerParams.model_fields) if f != "locale"]
+        return [f for f in list(PersonSamplerParams.model_fields) if f not in ("locale", "sampler_type")]
     @property
     def people_gen_key(self) -> str:
@@ -533,11 +543,12 @@ class PersonFromFakerSamplerParams(ConfigBase):
         min_length=2,
         max_length=2,
     )
+    sampler_type: Literal[SamplerType.PERSON_FROM_FAKER] = SamplerType.PERSON_FROM_FAKER
     @property
     def generator_kwargs(self) -> list[str]:
         """Keyword arguments to pass to the person generator."""
-        return [f for f in list(PersonFromFakerSamplerParams.model_fields) if f != "locale"]
+        return [f for f in list(PersonFromFakerSamplerParams.model_fields) if f not in ("locale", "sampler_type")]
     @property
     def people_gen_key(self) -> str:

data_designer/engine/dataset_builders/artifact_storage.py CHANGED Viewed

@@ -1,6 +1,8 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
+from datetime import datetime
+from functools import cached_property
 import json
 import logging
 from pathlib import Path
@@ -36,9 +38,21 @@ class ArtifactStorage(BaseModel):
     def artifact_path_exists(self) -> bool:
         return self.artifact_path.exists()
+    @cached_property
+    def resolved_dataset_name(self) -> str:
+        dataset_path = self.artifact_path / self.dataset_name
+        if dataset_path.exists() and len(list(dataset_path.iterdir())) > 0:
+            new_dataset_name = f"{self.dataset_name}_{datetime.now().strftime('%m-%d-%Y_%H%M%S')}"
+            logger.info(
+                f"📂 Dataset path {str(dataset_path)!r} already exists. Dataset from this session"
+                f"\n\t\t     will be saved to {str(self.artifact_path / new_dataset_name)!r} instead."
+            )
+            return new_dataset_name
+        return self.dataset_name
     @property
     def base_dataset_path(self) -> Path:
-        return self.artifact_path / self.dataset_name
+        return self.artifact_path / self.resolved_dataset_name
     @property
     def dropped_columns_dataset_path(self) -> Path:

data_designer/engine/dataset_builders/column_wise_builder.py CHANGED Viewed

@@ -88,8 +88,8 @@ class ColumnWiseDatasetBuilder:
         start_time = time.perf_counter()
         self.batch_manager.start(num_records=num_records, buffer_size=buffer_size)
-        for batch_idx in range(1, self.batch_manager.num_batches + 1):
-            logger.info(f"⏳ Processing batch {batch_idx} of {self.batch_manager.num_batches}")
+        for batch_idx in range(self.batch_manager.num_batches):
+            logger.info(f"⏳ Processing batch {batch_idx + 1} of {self.batch_manager.num_batches}")
             self._run_batch(generators)
             df_batch = self._run_processors(
                 stage=BuildStage.POST_BATCH,

data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py CHANGED Viewed

@@ -14,6 +14,7 @@ REQUIRED_FIELDS = {"first_name", "last_name", "age", "locale"}
 PII_FIELDS = [
+    # Core demographic fields
     "uuid",
     "first_name",
     "middle_name",
@@ -22,25 +23,38 @@ PII_FIELDS = [
     "age",
     "birth_date",
     "marital_status",
-    "street_name",
-    "street_number",
-    "unit",
     "postcode",
-    "region",
     "city",
-    "district",
+    "region",
     "country",
-    "area",
-    "zone",
+    "locale",
     "bachelors_field",
-    "education_degree",
     "education_level",
     "occupation",
-    "locale",
+    "national_id",
+    # US-specific fields
+    "street_name",
+    "street_number",
+    "unit",
+    "state",
+    "email_address",
+    "phone_number",
+    # Japan-specific fields
+    "area",
+    "prefecture",
+    "zone",
+    # India-specific fields
+    "district",
+    "religion",
+    "education_degree",
+    "first_language",
+    "second_language",
+    "third_language",
 ]
 PERSONA_FIELDS = [
+    # Core persona fields
     "persona",
     "career_goals_and_ambitions",
     "arts_persona",
@@ -61,4 +75,12 @@ PERSONA_FIELDS = [
     "extraversion",
     "agreeableness",
     "neuroticism",
+    # Japan-specific persona fields
+    "aspects",
+    "digital_skills",
+    # India-specific persona fields
+    "linguistic_persona",
+    "religious_persona",
+    "linguistic_background",
+    "religious_background",
 ]

data_designer/interface/data_designer.py CHANGED Viewed

@@ -9,8 +9,8 @@ import pandas as pd
 from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
 from data_designer.config.config_builder import DataDesignerConfigBuilder
 from data_designer.config.default_model_settings import (
-    get_defaul_model_providers_missing_api_keys,
     get_default_model_configs,
+    get_default_model_providers_missing_api_keys,
     get_default_provider_name,
     get_default_providers,
     resolve_seed_default_model_settings,
@@ -173,7 +173,11 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
                 configuration (columns, constraints, seed data, etc.).
             num_records: Number of records to generate.
             dataset_name: Name of the dataset. This name will be used as the dataset
-                folder name in the artifact path directory.
+                folder name in the artifact path directory. If a non-empty directory with the
+                same name already exists, dataset will be saved to a new directory with
+                a datetime stamp. For example, if the dataset name is "awesome_dataset" and a directory
+                with the same name already exists, the dataset will be saved to a new directory
+                with the name "awesome_dataset_2025-01-01_12-00-00".
         Returns:
             DatasetCreationResults object with methods for loading the generated dataset,
@@ -313,7 +317,7 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
         if model_providers is None:
             if can_run_data_designer_locally():
                 model_providers = get_default_providers()
-                missing_api_keys = get_defaul_model_providers_missing_api_keys()
+                missing_api_keys = get_default_model_providers_missing_api_keys()
                 if len(missing_api_keys) == len(PREDEFINED_PROVIDERS):
                     logger.warning(
                         "🚨 You are trying to use a default model provider but your API keys are missing."

{data_designer-0.1.1.dist-info → data_designer-0.1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: data-designer
-Version: 0.1.1
+Version: 0.1.3
 Summary: General framework for synthetic data generation
 License-Expression: Apache-2.0
 License-File: LICENSE
@@ -97,8 +97,7 @@ export NVIDIA_API_KEY="your-api-key-here"
 export OPENAI_API_KEY="your-openai-api-key-here"
 ```
-### 3. Generate your first dataset
+### 3. Start generating data!
 ```python
 from data_designer.essentials import (
     CategorySamplerParams,
@@ -139,8 +138,6 @@ preview = data_designer.preview(config_builder=config_builder)
 preview.display_sample_record()
 ```
-**That's it!** You've created a dataset.
 ---
 ## What's next?
@@ -148,7 +145,7 @@ preview.display_sample_record()
 ### 📚 Learn more
 - **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/quick-start/)** – Detailed walkthrough with more examples
-- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/intro/)** – Step-by-step interactive tutorials
+- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/)** – Step-by-step interactive tutorials
 - **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
 - **[Validators](https://nvidia-nemo.github.io/DataDesigner/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
 - **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/models/model-configs/)** – Configure custom models and providers

{data_designer-0.1.1.dist-info → data_designer-0.1.3.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 data_designer/__init__.py,sha256=iCeqRnb640RrL2QpA630GY5Ng7JiDt83Vq0DwLnNugU,461
-data_designer/_version.py,sha256=m8HxkqoKGw_wAJtc4ZokpJKNLXqp4zwnNhbnfDtro7w,704
+data_designer/_version.py,sha256=q5nF98G8SoVeJqaknL0xdyxtv0egsqb0fK06_84Izu8,704
 data_designer/errors.py,sha256=Z4eN9XwzZvGRdBluSNoSqQYkPPzNQIDf0ET_OqWRZh8,179
 data_designer/logging.py,sha256=O6LlQRj4IdkvEEYiMkKfMb_ZDgN1YpkGQUCqcp7nY6w,5354
 data_designer/plugin_manager.py,sha256=jWoo80x0oCiOIJMA43t-vK-_hVv9_xt4WhBcurYoDqw,3098
@@ -31,20 +31,20 @@ data_designer/cli/services/model_service.py,sha256=Fn3c0qMZqFAEqzBr0haLjp-nLKAkk
 data_designer/cli/services/provider_service.py,sha256=pdD2_C4yK0YBabcuan95H86UreZJ5zWFGI3Ue99mXXo,3916
 data_designer/config/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
 data_designer/config/base.py,sha256=xCbvwxXKRityWqeGP4zTXVuPHAOoUdpuQr8_t8vY8f8,2423
-data_designer/config/column_configs.py,sha256=QG65him__Xj4d47YX8x7jgVOZz81FrB9C8hpWTGzxLM,16640
+data_designer/config/column_configs.py,sha256=ixpanQApbn4LUyW7E4IJefXQG6c0eYbGxF-GGwV1xCg,18000
 data_designer/config/column_types.py,sha256=V0Ijwb-asYOX-GQyG9W-X_A-FIbFSajKuus58sG8CSM,6774
 data_designer/config/config_builder.py,sha256=NlAe6cwN6IAE90A8uPLsOdABmmYyUt6UnGYZwgmf_xE,27288
 data_designer/config/data_designer_config.py,sha256=cvIXMVQzYn9vC4GINPz972pDBmt-HrV5dvw1568LVmE,1719
 data_designer/config/dataset_builders.py,sha256=1pNFy_pkQ5lJ6AVZ43AeTuSbz6yC_l7Ndcyp5yaT8hQ,327
-data_designer/config/datastore.py,sha256=okuwUz-M5bSThvp_a9erKRoG4ej0bey1HUQBA7hgL98,6298
-data_designer/config/default_model_settings.py,sha256=b_oWsD350rb43009kcRxuPNgCZegB_noohURR4n1ZR0,4516
+data_designer/config/datastore.py,sha256=Ra6MsPCK6Q1Y8JbTQGRrKtyceig1s41ishyKSZoxgno,7572
+data_designer/config/default_model_settings.py,sha256=aMud_RrRStHnDSbwLxU3BnmIu08YtB1-EG6UUY9NedI,4517
 data_designer/config/errors.py,sha256=XneHH6tKHG2sZ71HzmPr7k3UBZ_psnSANknT30n-aa8,449
 data_designer/config/interface.py,sha256=2_tHvxtKAv0C5L7K4ztm-Xa1A-u9Njlwo2drdPa2qmk,1499
 data_designer/config/models.py,sha256=5Cy55BnKYyr-I1UHLUTqZxe6Ca9uVQWpUiwt9X0ZlrU,7521
 data_designer/config/preview_results.py,sha256=H6ETFI6L1TW8MEC9KYsJ1tXGIC5cloCggBCCZd6jiEE,1087
 data_designer/config/processors.py,sha256=qOF_plBoh6UEFNwUpyDgkqIuSDUaSM2S7k-kSAEB5p8,1328
 data_designer/config/sampler_constraints.py,sha256=4JxP-nge5KstqtctJnVg5RLM1w9mA7qFi_BjgTJl9CE,1167
-data_designer/config/sampler_params.py,sha256=rrub7LPnXb032ClEZfo0eB0WhMekW8DFH8yr20xSz3s,25759
+data_designer/config/sampler_params.py,sha256=W2GGRwzWZ4RlJAjDpyqSoF6bjpYjT7WHIhS3D0GfupE,26574
 data_designer/config/seed.py,sha256=g-iUToYSIFuTv3sbwSG_dF-9RwC8r8AvCD-vS8c_jDg,5487
 data_designer/config/validator_params.py,sha256=sNxFIF2bk_N4jJD-aMH1N5MQynDip08AoMI1ajxtRdc,3909
 data_designer/config/analysis/column_profilers.py,sha256=Qss9gr7oHNcjijW_MMIX9JkFX-V9v5vPwYWCnxLjMDY,2749
@@ -87,8 +87,8 @@ data_designer/engine/column_generators/generators/validation.py,sha256=MbDFXzief
 data_designer/engine/column_generators/utils/errors.py,sha256=ugNwaqnPdrPZI7YnKLbYwFjYUSm0WAzgaVu_u6i5Rc8,365
 data_designer/engine/column_generators/utils/judge_score_factory.py,sha256=JRoaZgRGK24dH0zx7MNGSccK196tQK_l0sbwNkurg7c,2132
 data_designer/engine/column_generators/utils/prompt_renderer.py,sha256=d4tbyPsgmFDikW3nxL5is9RNaajMkoPDCrfkQkxw7rc,4760
-data_designer/engine/dataset_builders/artifact_storage.py,sha256=NlO8H4g4ZaI5iDwI-xnhyyKGTdLX5JunqQuiQNXW-yI,7303
-data_designer/engine/dataset_builders/column_wise_builder.py,sha256=pu7mJIc5Ld4TLeTDsh9sCzKHgCbe7cC5PDF4RmxXw8o,13077
+data_designer/engine/dataset_builders/artifact_storage.py,sha256=0hpjJ4s3kQ3h-cEpgtIcDpx3UIEMH1FNX5Sp_8yRU9s,7995
+data_designer/engine/dataset_builders/column_wise_builder.py,sha256=bXaFhFD0GsY-9b_GLXY345N0BH5z2YjiWrs_yFDqYgA,13074
 data_designer/engine/dataset_builders/errors.py,sha256=1kChleChG4rASWIiL4Bel6Ox6aFZjQUrh5ogPt1CDWo,359
 data_designer/engine/dataset_builders/multi_column_configs.py,sha256=t28fhI-WRIBohFnAJ80l5EAETEDB5rJ5RSWInMiRfyE,1619
 data_designer/engine/dataset_builders/utils/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
@@ -148,7 +148,7 @@ data_designer/engine/sampling_gen/data_sources/base.py,sha256=BRU9pzDvgB5B1Mgtj8
 data_designer/engine/sampling_gen/data_sources/errors.py,sha256=5pq42e5yvUqaH-g09jWvJolYCO2I2Rdrqo1O0gwet8Y,326
 data_designer/engine/sampling_gen/data_sources/sources.py,sha256=63YaRau37NIc2TDn8JvTOsd0zfnY4_aaF9UOU5ryKSo,13387
 data_designer/engine/sampling_gen/entities/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
-data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py,sha256=-_ebkhKeRYtlGpY8ZKGuc40aJfeWQahW2L-BBRxRnO0,1316
+data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py,sha256=W_QSYNO2ynsXGJ71y_M9uRpYjjcbcAFhp1MpDFdl9YM,1844
 data_designer/engine/sampling_gen/entities/email_address_utils.py,sha256=-V4zuuFq1t3nzzO_FqzCWApPcWNKAh-ZQYFMmCiu5RE,5231
 data_designer/engine/sampling_gen/entities/errors.py,sha256=QEq-6Ld9OlModEYbse0pvY21OC5CyO-OalrL03-iLME,311
 data_designer/engine/sampling_gen/entities/national_id_utils.py,sha256=vxxHnrfQP98W8dWGysCjvfIT-h1xEGdfxn5xF_-UeXw,2611
@@ -163,15 +163,15 @@ data_designer/engine/validators/remote.py,sha256=jtDIvWzfHh17m2ac_Fp93p49Th8RlkB
 data_designer/engine/validators/sql.py,sha256=bxbyxPxDT9yuwjhABVEY40iR1pzWRFi65WU4tPgG2bE,2250
 data_designer/essentials/__init__.py,sha256=zrDZ7hahOmOhCPdfoj0z9ALN10lXIesfwd2qXRqTcdY,4125
 data_designer/interface/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
-data_designer/interface/data_designer.py,sha256=MSzT9OFd3V6saZID0vfQxx0oB6Fth8GmEFcnmFNXOVo,16271
+data_designer/interface/data_designer.py,sha256=USPTruC5axBJNEWEnYBJ4ol2d3mXGubHELBmWeahFe8,16664
 data_designer/interface/errors.py,sha256=jagKT3tPUnYq4e3e6AkTnBkcayHyEfxjPMBzx-GEKe4,565
 data_designer/interface/results.py,sha256=qFxa8SuCXeADiRpaCMBwJcExkJBCfUPeGCdcJSTjoTc,2111
 data_designer/plugins/__init__.py,sha256=c_V7q4QhfVoNf_uc9UwmXCsWqwtyWogI7YoN_0PzzE4,234
 data_designer/plugins/errors.py,sha256=yPIHpSddEr-o9ZcNVibb2hI-73O15Kg_Od8SlmQlnRs,297
 data_designer/plugins/plugin.py,sha256=7ErdUyrTdOb5PCBE3msdhTOrvQpldjOQw90-Bu4Bosc,2522
 data_designer/plugins/registry.py,sha256=iPDTh4duV1cKt7H1fXkj1bKLG6SyUKmzQ9xh-vjEoaM,3018
-data_designer-0.1.1.dist-info/METADATA,sha256=MeR9kVPEkyXH8I-qiYdZpTiZ1yM2FWWi3PiPYsGsX9c,6698
-data_designer-0.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-data_designer-0.1.1.dist-info/entry_points.txt,sha256=NWWWidyDxN6CYX6y664PhBYMhbaYTQTyprqfYAgkyCg,57
-data_designer-0.1.1.dist-info/licenses/LICENSE,sha256=cSWJDwVqHyQgly8Zmt3pqXJ2eQbZVYwN9qd0NMssxXY,11336
-data_designer-0.1.1.dist-info/RECORD,,
+data_designer-0.1.3.dist-info/METADATA,sha256=fCI36BVPIOC7FVxQviBmzWMX8HRnc69afkJ82xPYXbY,6644
+data_designer-0.1.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+data_designer-0.1.3.dist-info/entry_points.txt,sha256=NWWWidyDxN6CYX6y664PhBYMhbaYTQTyprqfYAgkyCg,57
+data_designer-0.1.3.dist-info/licenses/LICENSE,sha256=cSWJDwVqHyQgly8Zmt3pqXJ2eQbZVYwN9qd0NMssxXY,11336
+data_designer-0.1.3.dist-info/RECORD,,

{data_designer-0.1.1.dist-info → data_designer-0.1.3.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.27.0
+Generator: hatchling 1.28.0
 Root-Is-Purelib: true
 Tag: py3-none-any

{data_designer-0.1.1.dist-info → data_designer-0.1.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{data_designer-0.1.1.dist-info → data_designer-0.1.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

data-designer 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

data-designer 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl