PyPI - data-designer - Versions diffs - 0.3.8rc2__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

data-designer 0.3.8rc2py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

data_designer/cli/commands/__init__.py +1 -1
data_designer/interface/__init__.py +21 -1
data_designer/{_version.py → interface/_version.py} +2 -2
data_designer/interface/data_designer.py +1 -7
{data_designer-0.3.8rc2.dist-info → data_designer-0.4.0.dist-info}/METADATA +10 -42
data_designer-0.4.0.dist-info/RECORD +39 -0
data_designer/__init__.py +0 -17
data_designer/config/__init__.py +0 -2
data_designer/config/analysis/__init__.py +0 -2
data_designer/config/analysis/column_profilers.py +0 -159
data_designer/config/analysis/column_statistics.py +0 -421
data_designer/config/analysis/dataset_profiler.py +0 -84
data_designer/config/analysis/utils/errors.py +0 -10
data_designer/config/analysis/utils/reporting.py +0 -192
data_designer/config/base.py +0 -69
data_designer/config/column_configs.py +0 -470
data_designer/config/column_types.py +0 -141
data_designer/config/config_builder.py +0 -595
data_designer/config/data_designer_config.py +0 -40
data_designer/config/dataset_builders.py +0 -13
data_designer/config/dataset_metadata.py +0 -18
data_designer/config/default_model_settings.py +0 -129
data_designer/config/errors.py +0 -24
data_designer/config/exports.py +0 -145
data_designer/config/interface.py +0 -55
data_designer/config/models.py +0 -455
data_designer/config/preview_results.py +0 -41
data_designer/config/processors.py +0 -148
data_designer/config/run_config.py +0 -51
data_designer/config/sampler_constraints.py +0 -52
data_designer/config/sampler_params.py +0 -639
data_designer/config/seed.py +0 -116
data_designer/config/seed_source.py +0 -84
data_designer/config/seed_source_types.py +0 -19
data_designer/config/utils/code_lang.py +0 -82
data_designer/config/utils/constants.py +0 -363
data_designer/config/utils/errors.py +0 -21
data_designer/config/utils/info.py +0 -94
data_designer/config/utils/io_helpers.py +0 -258
data_designer/config/utils/misc.py +0 -78
data_designer/config/utils/numerical_helpers.py +0 -30
data_designer/config/utils/type_helpers.py +0 -106
data_designer/config/utils/visualization.py +0 -482
data_designer/config/validator_params.py +0 -94
data_designer/engine/__init__.py +0 -2
data_designer/engine/analysis/column_profilers/base.py +0 -49
data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -153
data_designer/engine/analysis/column_profilers/registry.py +0 -22
data_designer/engine/analysis/column_statistics.py +0 -145
data_designer/engine/analysis/dataset_profiler.py +0 -149
data_designer/engine/analysis/errors.py +0 -9
data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -234
data_designer/engine/analysis/utils/judge_score_processing.py +0 -132
data_designer/engine/column_generators/__init__.py +0 -2
data_designer/engine/column_generators/generators/__init__.py +0 -2
data_designer/engine/column_generators/generators/base.py +0 -122
data_designer/engine/column_generators/generators/embedding.py +0 -35
data_designer/engine/column_generators/generators/expression.py +0 -55
data_designer/engine/column_generators/generators/llm_completion.py +0 -113
data_designer/engine/column_generators/generators/samplers.py +0 -69
data_designer/engine/column_generators/generators/seed_dataset.py +0 -144
data_designer/engine/column_generators/generators/validation.py +0 -140
data_designer/engine/column_generators/registry.py +0 -60
data_designer/engine/column_generators/utils/errors.py +0 -15
data_designer/engine/column_generators/utils/generator_classification.py +0 -43
data_designer/engine/column_generators/utils/judge_score_factory.py +0 -58
data_designer/engine/column_generators/utils/prompt_renderer.py +0 -100
data_designer/engine/compiler.py +0 -97
data_designer/engine/configurable_task.py +0 -71
data_designer/engine/dataset_builders/artifact_storage.py +0 -283
data_designer/engine/dataset_builders/column_wise_builder.py +0 -335
data_designer/engine/dataset_builders/errors.py +0 -15
data_designer/engine/dataset_builders/multi_column_configs.py +0 -46
data_designer/engine/dataset_builders/utils/__init__.py +0 -2
data_designer/engine/dataset_builders/utils/concurrency.py +0 -212
data_designer/engine/dataset_builders/utils/config_compiler.py +0 -62
data_designer/engine/dataset_builders/utils/dag.py +0 -62
data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -200
data_designer/engine/dataset_builders/utils/errors.py +0 -15
data_designer/engine/errors.py +0 -51
data_designer/engine/model_provider.py +0 -77
data_designer/engine/models/__init__.py +0 -2
data_designer/engine/models/errors.py +0 -300
data_designer/engine/models/facade.py +0 -287
data_designer/engine/models/factory.py +0 -42
data_designer/engine/models/litellm_overrides.py +0 -179
data_designer/engine/models/parsers/__init__.py +0 -2
data_designer/engine/models/parsers/errors.py +0 -34
data_designer/engine/models/parsers/parser.py +0 -235
data_designer/engine/models/parsers/postprocessors.py +0 -93
data_designer/engine/models/parsers/tag_parsers.py +0 -62
data_designer/engine/models/parsers/types.py +0 -84
data_designer/engine/models/recipes/base.py +0 -81
data_designer/engine/models/recipes/response_recipes.py +0 -293
data_designer/engine/models/registry.py +0 -146
data_designer/engine/models/telemetry.py +0 -359
data_designer/engine/models/usage.py +0 -73
data_designer/engine/models/utils.py +0 -38
data_designer/engine/processing/ginja/__init__.py +0 -2
data_designer/engine/processing/ginja/ast.py +0 -65
data_designer/engine/processing/ginja/environment.py +0 -463
data_designer/engine/processing/ginja/exceptions.py +0 -56
data_designer/engine/processing/ginja/record.py +0 -32
data_designer/engine/processing/gsonschema/__init__.py +0 -2
data_designer/engine/processing/gsonschema/exceptions.py +0 -15
data_designer/engine/processing/gsonschema/schema_transformers.py +0 -83
data_designer/engine/processing/gsonschema/types.py +0 -10
data_designer/engine/processing/gsonschema/validators.py +0 -202
data_designer/engine/processing/processors/base.py +0 -13
data_designer/engine/processing/processors/drop_columns.py +0 -42
data_designer/engine/processing/processors/registry.py +0 -25
data_designer/engine/processing/processors/schema_transform.py +0 -49
data_designer/engine/processing/utils.py +0 -169
data_designer/engine/registry/base.py +0 -99
data_designer/engine/registry/data_designer_registry.py +0 -39
data_designer/engine/registry/errors.py +0 -12
data_designer/engine/resources/managed_dataset_generator.py +0 -39
data_designer/engine/resources/managed_dataset_repository.py +0 -197
data_designer/engine/resources/managed_storage.py +0 -65
data_designer/engine/resources/resource_provider.py +0 -77
data_designer/engine/resources/seed_reader.py +0 -154
data_designer/engine/sampling_gen/column.py +0 -91
data_designer/engine/sampling_gen/constraints.py +0 -100
data_designer/engine/sampling_gen/data_sources/base.py +0 -217
data_designer/engine/sampling_gen/data_sources/errors.py +0 -12
data_designer/engine/sampling_gen/data_sources/sources.py +0 -347
data_designer/engine/sampling_gen/entities/__init__.py +0 -2
data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -86
data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -171
data_designer/engine/sampling_gen/entities/errors.py +0 -10
data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -102
data_designer/engine/sampling_gen/entities/person.py +0 -144
data_designer/engine/sampling_gen/entities/phone_number.py +0 -128
data_designer/engine/sampling_gen/errors.py +0 -26
data_designer/engine/sampling_gen/generator.py +0 -122
data_designer/engine/sampling_gen/jinja_utils.py +0 -64
data_designer/engine/sampling_gen/people_gen.py +0 -199
data_designer/engine/sampling_gen/person_constants.py +0 -56
data_designer/engine/sampling_gen/schema.py +0 -147
data_designer/engine/sampling_gen/schema_builder.py +0 -61
data_designer/engine/sampling_gen/utils.py +0 -46
data_designer/engine/secret_resolver.py +0 -82
data_designer/engine/validation.py +0 -367
data_designer/engine/validators/__init__.py +0 -19
data_designer/engine/validators/base.py +0 -38
data_designer/engine/validators/local_callable.py +0 -39
data_designer/engine/validators/python.py +0 -254
data_designer/engine/validators/remote.py +0 -89
data_designer/engine/validators/sql.py +0 -65
data_designer/errors.py +0 -7
data_designer/essentials/__init__.py +0 -33
data_designer/lazy_heavy_imports.py +0 -54
data_designer/logging.py +0 -163
data_designer/plugin_manager.py +0 -78
data_designer/plugins/__init__.py +0 -8
data_designer/plugins/errors.py +0 -15
data_designer/plugins/plugin.py +0 -141
data_designer/plugins/registry.py +0 -88
data_designer/plugins/testing/__init__.py +0 -10
data_designer/plugins/testing/stubs.py +0 -116
data_designer/plugins/testing/utils.py +0 -20
data_designer-0.3.8rc2.dist-info/RECORD +0 -196
data_designer-0.3.8rc2.dist-info/licenses/LICENSE +0 -201
{data_designer-0.3.8rc2.dist-info → data_designer-0.4.0.dist-info}/WHEEL +0 -0
{data_designer-0.3.8rc2.dist-info → data_designer-0.4.0.dist-info}/entry_points.txt +0 -0

data_designer/engine/validators/python.py DELETED Viewed

@@ -1,254 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-from __future__ import annotations
-import ast
-import json
-import logging
-import subprocess
-import tempfile
-from collections import defaultdict
-from pathlib import Path
-from typing import TYPE_CHECKING
-from uuid import uuid4
-from pydantic import BaseModel
-from ruff.__main__ import find_ruff_bin
-from data_designer.config.validator_params import CodeValidatorParams
-from data_designer.engine.validators.base import BaseValidator, ValidationOutput, ValidationResult
-from data_designer.lazy_heavy_imports import pd
-if TYPE_CHECKING:
-    import pandas as pd
-logger = logging.getLogger(__name__)
-PYLINT_ERROR_CATEGORIES_ORDERED = [
-    "fatal",
-    "error",
-    "warning",
-    "convention",
-    "refactor",
-]
-PYLINT_VALID_LEVELS = {"none", "warning", "convention", "refactor"}
-TYPE_FROM_SYMBOL = {
-    "E": "refactor",
-    "F": "error",
-    "SIM": "refactor",
-    "PLC": "convention",
-    "PLE": "error",
-    "PLR": "refactor",
-    "PLW": "warning",
-    "SyntaxError": "fatal",
-}
-PYTHON_MESSAGES_FIELD = "python_linter_messages"
-RECORD_ID_COLUMN_NAME = "internal_code_record_id"
-class PythonValidationStat(BaseModel):
-    fatal: int = 0
-    error: int = 0
-    warning: int = 0
-    refactor: int = 0
-    convention: int = 0
-    statement: int = 0
-    @property
-    def score(self) -> float:
-        # https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html#evaluation
-        if self.statement == 0:  # prevent division by zero down below
-            self.statement = max(1, self.statement)
-        return max(
-            0,
-            (
-                0
-                if self.fatal
-                else 10.0
-                - ((float(5 * self.error + self.warning + self.refactor + self.convention) / self.statement) * 10)
-            ),
-        )
-class PythonLinterMessage(BaseModel):
-    type: str
-    symbol: str
-    line: int
-    column: int
-    message: str
-    @property
-    def type_sort_order(self) -> int:
-        return PYLINT_ERROR_CATEGORIES_ORDERED.index(self.type)
-class PythonLinterMessages(BaseModel):
-    _messages: list[PythonLinterMessage] = []
-    @property
-    def messages(self) -> list[PythonLinterMessage]:
-        # Ordered by severity first then by line number
-        return sorted(self._messages, key=lambda msg: (msg.type_sort_order, msg.line))
-    def add(self, message: PythonLinterMessage) -> None:
-        self._messages.append(message)
-    def get_count_by_type(self) -> dict[str, int]:
-        count_by_type = defaultdict(int)
-        for message in self.messages:
-            count_by_type[message.type] += 1
-        return dict(count_by_type)
-    @property
-    def is_empty(self) -> bool:
-        return len(self.messages) == 0
-    @property
-    def severity(self) -> str:
-        if self.is_empty:
-            return "none"
-        return self.messages[0].type
-    @property
-    def is_valid(self) -> bool:
-        return self.is_empty or self.messages[0].type in PYLINT_VALID_LEVELS
-class PythonValidator(BaseValidator):
-    def __init__(self, config: CodeValidatorParams):
-        self.config = config
-    def run_validation(self, data: list[dict]) -> ValidationResult:
-        df = pd.DataFrame(data)
-        if len(df.columns) > 1:
-            raise ValueError("Python validator assumes single column input")
-        target_column = df.columns[0]
-        df.loc[:, RECORD_ID_COLUMN_NAME] = [uuid4() for _ in range(df.shape[0])]
-        with tempfile.TemporaryDirectory() as temp_dir:
-            _ = df.apply(
-                self._write_code_to_file,
-                args=(target_column, temp_dir),
-                axis=1,
-            )
-            results = self._validate_files_in_path(path=temp_dir)
-            records = df.to_dict(orient="records")
-            ordered_results = []
-            for record in records:
-                module_id = self._get_module_name(record[RECORD_ID_COLUMN_NAME], target_column)
-                result = results.get(module_id)
-                if result is not None:
-                    ordered_results.append(result)
-        return ValidationResult(data=ordered_results)
-    def _validate_files_in_path(self, path: str) -> dict[str, ValidationOutput]:
-        lint_results = self._run_linter(path)
-        scores_by_module = self._get_scores(
-            {
-                module: messages.get_count_by_type()
-                | {"statement": self._count_python_statements(f"{path}/{module}.py")}
-                for module, messages in lint_results.items()
-            }
-        )
-        validation_result = {}
-        for module, score in scores_by_module.items():
-            messages = lint_results.get(module, PythonLinterMessages())
-            metadata = {
-                "python_linter_score": score,
-                "python_linter_severity": messages.severity,
-                PYTHON_MESSAGES_FIELD: [m.model_dump() for m in messages.messages],
-            }
-            validation_result[module] = ValidationOutput(is_valid=messages.is_valid, **metadata)
-        return validation_result
-    def _write_code_to_file(self, row: pd.Series, code_column: str, path: str) -> None:
-        with open(f"{path}/{self._get_module_name(row[RECORD_ID_COLUMN_NAME], code_column)}.py", "w") as file:
-            file.write(row[code_column])
-    @staticmethod
-    def _get_module_name(record_id: str, column_name: str) -> str:
-        return f"{record_id}_{column_name}"
-    @staticmethod
-    def _run_linter(codebase_path: str) -> dict[str, PythonLinterMessages]:
-        # Create empty dict for output
-        processed = {}
-        for file in Path(codebase_path).glob("*.py"):
-            processed[file.stem] = PythonLinterMessages()
-        # Run ruff linter with JSON output
-        ruff_bin = find_ruff_bin()
-        ruff_exec = subprocess.run(
-            [
-                ruff_bin,
-                "check",
-                "--select",
-                "E,F6,F7,F8,SIM,PLC,PLE,PLR,PLW",
-                "--output-format=json",
-                codebase_path,
-            ],
-            text=True,
-            capture_output=True,
-            check=False,
-            cwd=Path.cwd(),
-        )
-        ruff_output = ruff_exec.stdout
-        # Parse JSON output
-        try:
-            diagnostics = json.loads(ruff_output)
-        except json.JSONDecodeError as e:
-            raise RuntimeError(f"Failed to parse ruff JSON output: {e}")
-        if not diagnostics:
-            return processed  # no errors or warnings
-        for diagnostic in diagnostics:
-            filename = diagnostic["filename"]
-            code = diagnostic["code"]
-            location = diagnostic["location"]
-            message = diagnostic["message"]
-            # Extract alphabetic prefix from code for type mapping
-            alpha_prefix = "".join(c for c in code if c.isalpha())
-            error_type = TYPE_FROM_SYMBOL.get(alpha_prefix, "warning")
-            processed[Path(filename).stem].add(
-                PythonLinterMessage(
-                    type=error_type,
-                    symbol=code,
-                    line=location["row"],
-                    column=location["column"],
-                    message=message,
-                )
-            )
-        return processed
-    @staticmethod
-    def _get_scores(stats_by_module: dict[str, dict[str, int]]) -> dict[str, float]:
-        scores = {}
-        for key, item in stats_by_module.items():
-            stat = PythonValidationStat(**item)
-            scores[key] = stat.score
-        return scores
-    @staticmethod
-    def _count_python_statements(file_path: str) -> int:
-        """Count the number of statements in a Python file."""
-        try:
-            with open(file_path, "r", encoding="utf-8") as f:
-                tree = ast.parse(f.read())
-            return sum(1 for node in ast.walk(tree) if isinstance(node, ast.stmt))
-        except Exception:
-            return 0

data_designer/engine/validators/remote.py DELETED Viewed

@@ -1,89 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-from __future__ import annotations
-import logging
-from typing import TYPE_CHECKING
-from httpx_retries import Retry, RetryTransport
-from data_designer.config.validator_params import RemoteValidatorParams
-from data_designer.engine.errors import RemoteValidationSchemaError
-from data_designer.engine.processing.gsonschema.exceptions import JSONSchemaValidationError
-from data_designer.engine.processing.gsonschema.validators import validate
-from data_designer.engine.validators.base import BaseValidator, ValidationResult
-from data_designer.lazy_heavy_imports import httpx
-if TYPE_CHECKING:
-    import httpx
-logger = logging.getLogger(__name__)
-class RemoteEndpointClient:
-    """Client for making parallel HTTP requests to remote endpoints with retry, timeout, and auth support."""
-    def __init__(
-        self,
-        config: RemoteValidatorParams,
-    ):
-        """
-        Initialize the remote endpoint client.
-        Args:
-            config: Remote validator parameters
-        """
-        self.endpoint_url = config.endpoint_url
-        self.output_schema = config.output_schema
-        self.timeout = config.timeout
-        self.max_retries = config.max_retries
-        self.retry_backoff = config.retry_backoff
-    def post_to_remote_endpoint(self, content: dict) -> dict:
-        """
-        Make a single HTTP request with retry logic.
-        Args:
-            content: The content to be posted to the remote endpoint
-        Returns:
-            The JSON response from the remote endpoint
-        Raises:
-            httpx.RequestError: If all retry attempts fail
-            httpx.HTTPStatusError: If the server returns an error status
-        """
-        retry = Retry(
-            total=self.max_retries,
-            backoff_factor=self.retry_backoff,
-            status_forcelist=[429, 500, 502, 503, 504],
-        )
-        transport = RetryTransport(retry=retry)
-        with httpx.Client(
-            timeout=httpx.Timeout(self.timeout),
-            transport=transport,
-        ) as http_client:
-            response = http_client.post(
-                self.endpoint_url,
-                json=content,
-            )
-            response.raise_for_status()
-            response_json = response.json()
-            if self.output_schema:
-                try:
-                    validate(response_json, self.output_schema, no_extra_properties=True)
-                except JSONSchemaValidationError as exc:
-                    raise RemoteValidationSchemaError(str(exc)) from exc
-            return response_json
-class RemoteValidator(BaseValidator):
-    def __init__(self, config: RemoteValidatorParams):
-        self.remote_endpoint_client = RemoteEndpointClient(config=config)
-    def run_validation(self, data: list[dict]) -> ValidationResult:
-        result = self.remote_endpoint_client.post_to_remote_endpoint(content={"data": data})
-        return ValidationResult.model_validate(result)

data_designer/engine/validators/sql.py DELETED Viewed

@@ -1,65 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-from __future__ import annotations
-import logging
-import re
-from typing import TYPE_CHECKING
-from data_designer.config.utils.code_lang import CodeLang
-from data_designer.config.validator_params import CodeValidatorParams
-from data_designer.engine.validators.base import BaseValidator, ValidationOutput, ValidationResult
-from data_designer.lazy_heavy_imports import pd, sqlfluff
-if TYPE_CHECKING:
-    import pandas as pd
-    import sqlfluff
-sqlfluff_logger = logging.getLogger("sqlfluff")
-sqlfluff_logger.setLevel(logging.WARNING)
-class SQLValidator(BaseValidator):
-    def __init__(self, config: CodeValidatorParams):
-        self.config = config
-    def run_validation(self, data: list[dict]) -> ValidationResult:
-        df = pd.DataFrame(data)
-        if len(df.columns) > 1:
-            raise ValueError("SQL validator assumes single column input")
-        target_column = df.columns[0]
-        records = df.to_dict(orient="records")
-        results = []
-        for record in records:
-            result = self._validate_query(record[target_column])
-            results.append(result)
-        return ValidationResult(data=results)
-    def _validate_query(self, content: str) -> ValidationResult:
-        try:
-            result = sqlfluff.lint(
-                content,
-                dialect=CodeLang.parse_dialect(self.config.code_lang),
-            )
-            prs_errors = [res for res in result if res["code"].startswith("PRS")]
-            error_messages = "\n".join([f"{error['code']}: {error['description']}" for error in prs_errors])
-            decimal_pattern = re.compile(r"DECIMAL\(\d+\)")
-            decimal_issues = decimal_pattern.findall(content)
-            if decimal_issues:
-                error_messages += "\nCustom Check: Found DECIMAL definitions without a scale, which may be incorrect."
-            if error_messages:
-                return ValidationOutput(
-                    is_valid=False,
-                    error_messages=error_messages,
-                )
-            return ValidationOutput(is_valid=True, error_messages="")
-        except Exception as e:
-            return ValidationOutput(
-                is_valid=False,
-                error_messages=f"Exception during SQL parsing: {e}",
-            )

data_designer/errors.py DELETED Viewed

@@ -1,7 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-from __future__ import annotations
-class DataDesignerError(Exception): ...

data_designer/essentials/__init__.py DELETED Viewed

@@ -1,33 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-from __future__ import annotations
-from data_designer.config.default_model_settings import resolve_seed_default_model_settings
-from data_designer.config.exports import *  # noqa: F403
-from data_designer.config.run_config import RunConfig
-from data_designer.config.validator_params import LocalCallableValidatorParams
-from data_designer.interface.data_designer import DataDesigner
-from data_designer.logging import LoggingConfig, configure_logging
-configure_logging(LoggingConfig.default())
-# Resolve default model settings on import to ensure they are available when the library is used.
-resolve_seed_default_model_settings()
-def get_essentials_exports() -> list[str]:
-    logging = [
-        configure_logging.__name__,
-        LoggingConfig.__name__,
-    ]
-    local = [
-        DataDesigner.__name__,
-        LocalCallableValidatorParams.__name__,
-        RunConfig.__name__,
-    ]
-    return logging + local + get_config_exports()  # noqa: F405
-__all__ = get_essentials_exports()

data_designer/lazy_heavy_imports.py DELETED Viewed

@@ -1,54 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-"""
-Lazy imports facade for heavy third-party dependencies.
-This module provides a centralized facade that lazily imports heavy dependencies
-only when accessed, significantly improving import performance.
-Usage:
-    from data_designer.lazy_heavy_imports import pd, np, faker, litellm
-    df = pd.DataFrame(...)
-    arr = np.array([1, 2, 3])
-    fake = faker.Faker()
-"""
-from __future__ import annotations
-import importlib
-# Mapping of lazy import names to their actual module paths
-_LAZY_IMPORTS = {
-    "pd": "pandas",
-    "np": "numpy",
-    "pq": "pyarrow.parquet",
-    "pa": "pyarrow",
-    "faker": "faker",
-    "litellm": "litellm",
-    "sqlfluff": "sqlfluff",
-    "httpx": "httpx",
-    "duckdb": "duckdb",
-    "nx": "networkx",
-    "scipy": "scipy",
-    "jsonschema": "jsonschema",
-}
-def __getattr__(name: str) -> object:
-    """Lazily import heavy third-party dependencies when accessed.
-    This allows fast imports of data_designer while deferring loading of heavy
-    libraries until they're actually needed.
-    """
-    if name in _LAZY_IMPORTS:
-        module_name = _LAZY_IMPORTS[name]
-        return importlib.import_module(module_name)
-    raise AttributeError(f"module 'data_designer.lazy_heavy_imports' has no attribute {name!r}")
-def __dir__() -> list[str]:
-    """Return list of available lazy imports."""
-    return list(_LAZY_IMPORTS.keys())

data_designer/logging.py DELETED Viewed

@@ -1,163 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-from __future__ import annotations
-import logging
-import random
-import sys
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import TextIO
-from pythonjsonlogger import jsonlogger
-@dataclass
-class LoggerConfig:
-    name: str
-    level: str
-@dataclass
-class OutputConfig:
-    destination: TextIO | Path
-    structured: bool
-@dataclass
-class LoggingConfig:
-    logger_configs: list[LoggerConfig]
-    output_configs: list[OutputConfig]
-    root_level: str = "INFO"
-    to_silence: list[str] = field(default_factory=lambda: _DEFAULT_NOISY_LOGGERS)
-    @classmethod
-    def default(cls):
-        return LoggingConfig(
-            logger_configs=[LoggerConfig(name="data_designer", level="INFO")],
-            output_configs=[OutputConfig(destination=sys.stderr, structured=False)],
-        )
-    @classmethod
-    def debug(cls):
-        return LoggingConfig(
-            logger_configs=[LoggerConfig(name="data_designer", level="DEBUG")],
-            output_configs=[OutputConfig(destination=sys.stderr, structured=False)],
-        )
-class RandomEmoji:
-    """A generator for various themed emoji collections."""
-    @staticmethod
-    def cooking() -> str:
-        """Get a random cooking or food preparation emoji."""
-        return random.choice(["👨‍🍳", "👩‍🍳", "🍳", "🥘", "🍲", "🔪", "🥄", "🍴", "⏲️", "🥗"])
-    @staticmethod
-    def data() -> str:
-        """Get a random data or analytics emoji."""
-        return random.choice(["📊", "📈", "📉", "💾", "💿", "📀", "🗄️", "📁", "📂", "🗃️"])
-    @staticmethod
-    def generating() -> str:
-        """Get a random generating or creating emoji."""
-        return random.choice(["🏭", "⚙️", "🔨", "🛠️", "🏗️", "🎨", "✍️", "📝", "🔧", "⚒️"])
-    @staticmethod
-    def loading() -> str:
-        """Get a random loading or waiting emoji."""
-        return random.choice(["⏳", "⌛", "🔄", "♻️", "🔃", "⏰", "⏱️", "⏲️", "📡", "🌀"])
-    @staticmethod
-    def magic() -> str:
-        """Get a random magical or special effect emoji."""
-        return random.choice(["✨", "⭐", "🌟", "💫", "🪄", "🔮", "🎩", "🌈", "💎", "🦄"])
-    @staticmethod
-    def previewing() -> str:
-        """Get a random previewing or looking ahead emoji."""
-        return random.choice(["👀", "📺", "🔁", "👁️", "🔭", "🕵️", "🧐", "📸", "🎥", "🖼️"])
-    @staticmethod
-    def speed() -> str:
-        """Get a random speed or fast emoji."""
-        return random.choice(["⚡", "💨", "🏃", "🏎️", "🚄", "✈️", "💥", "⏩", "🏃‍♂️", "🏃‍♀️"])
-    @staticmethod
-    def start() -> str:
-        """Get a random emoji representing starting or launching something."""
-        return random.choice(["🚀", "▶️", "🎬", "🌅", "🏁", "🎯", "🚦", "🔔", "📣", "🎺"])
-    @staticmethod
-    def success() -> str:
-        """Get a random success or celebration emoji."""
-        return random.choice(["🎉", "🎊", "👏", "🙌", "🎆", "🍾", "☀️", "🏆", "✅", "🥳"])
-    @staticmethod
-    def thinking() -> str:
-        """Get a random thinking or processing emoji."""
-        return random.choice(["🤔", "💭", "🧠", "💡", "🔍", "🔎", "🤨", "🧐", "📝", "🧮"])
-    @staticmethod
-    def working() -> str:
-        """Get a random working or in-progress emoji."""
-        return random.choice(["⚙️", "🔧", "🔨", "⚒️", "🛠️", "💼", "👷", "🏗️", "🪛", "👨‍💻"])
-def configure_logging(config: LoggingConfig) -> None:
-    root_logger = logging.getLogger()
-    # Remove all handlers
-    root_logger.handlers.clear()
-    # Create and attach handler(s)
-    handlers = [_create_handler(output_config) for output_config in config.output_configs]
-    for handler in handlers:
-        root_logger.addHandler(handler)
-    # Set levels
-    root_logger.setLevel(config.root_level)
-    for logger_config in config.logger_configs:
-        logger = logging.getLogger(logger_config.name)
-        logger.setLevel(logger_config.level)
-    # Adjust noisy loggers
-    for name in config.to_silence:
-        quiet_noisy_logger(name)
-def quiet_noisy_logger(name: str) -> None:
-    logger = logging.getLogger(name)
-    logger.handlers.clear()
-    logger.setLevel(logging.WARNING)
-def _create_handler(output_config: OutputConfig) -> logging.Handler:
-    if isinstance(output_config.destination, Path):
-        handler = logging.FileHandler(str(output_config.destination))
-    else:
-        handler = logging.StreamHandler()
-    if output_config.structured:
-        formatter = _make_json_formatter()
-    else:
-        formatter = _make_stream_formatter()
-    handler.setFormatter(formatter)
-    return handler
-def _make_json_formatter() -> logging.Formatter:
-    log_format = "%(asctime)s %(levelname)s %(name)s %(message)s"
-    return jsonlogger.JsonFormatter(log_format)
-def _make_stream_formatter() -> logging.Formatter:
-    log_format = "[%(asctime)s] [%(levelname)s] %(message)s"
-    time_format = "%H:%M:%S"
-    return logging.Formatter(log_format, time_format)
-_DEFAULT_NOISY_LOGGERS = ["httpx", "matplotlib"]

data-designer 0.3.8rc2__py3-none-any.whl → 0.4.0__py3-none-any.whl

data-designer 0.3.8rc2py3-none-any.whl → 0.4.0py3-none-any.whl