data-designer 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. data_designer/_version.py +2 -2
  2. data_designer/config/analysis/column_profilers.py +4 -4
  3. data_designer/config/analysis/column_statistics.py +5 -5
  4. data_designer/config/analysis/dataset_profiler.py +6 -6
  5. data_designer/config/analysis/utils/errors.py +1 -1
  6. data_designer/config/analysis/utils/reporting.py +5 -5
  7. data_designer/config/base.py +2 -2
  8. data_designer/config/column_configs.py +8 -8
  9. data_designer/config/column_types.py +9 -5
  10. data_designer/config/config_builder.py +32 -27
  11. data_designer/config/data_designer_config.py +7 -7
  12. data_designer/config/datastore.py +4 -4
  13. data_designer/config/default_model_settings.py +4 -4
  14. data_designer/config/errors.py +1 -1
  15. data_designer/config/exports.py +133 -0
  16. data_designer/config/interface.py +6 -6
  17. data_designer/config/models.py +109 -5
  18. data_designer/config/preview_results.py +9 -6
  19. data_designer/config/processors.py +48 -4
  20. data_designer/config/sampler_constraints.py +1 -1
  21. data_designer/config/sampler_params.py +2 -2
  22. data_designer/config/seed.py +3 -3
  23. data_designer/config/utils/constants.py +1 -1
  24. data_designer/config/utils/errors.py +1 -1
  25. data_designer/config/utils/info.py +8 -4
  26. data_designer/config/utils/io_helpers.py +5 -5
  27. data_designer/config/utils/misc.py +3 -3
  28. data_designer/config/utils/numerical_helpers.py +1 -1
  29. data_designer/config/utils/type_helpers.py +7 -3
  30. data_designer/config/utils/validation.py +37 -6
  31. data_designer/config/utils/visualization.py +42 -10
  32. data_designer/config/validator_params.py +2 -2
  33. data_designer/engine/analysis/column_profilers/base.py +1 -1
  34. data_designer/engine/analysis/dataset_profiler.py +1 -1
  35. data_designer/engine/analysis/utils/judge_score_processing.py +1 -1
  36. data_designer/engine/column_generators/generators/samplers.py +1 -1
  37. data_designer/engine/dataset_builders/artifact_storage.py +16 -6
  38. data_designer/engine/dataset_builders/column_wise_builder.py +4 -1
  39. data_designer/engine/dataset_builders/utils/concurrency.py +1 -1
  40. data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +1 -1
  41. data_designer/engine/errors.py +1 -1
  42. data_designer/engine/models/errors.py +1 -1
  43. data_designer/engine/models/facade.py +1 -1
  44. data_designer/engine/models/parsers/parser.py +2 -2
  45. data_designer/engine/models/recipes/response_recipes.py +1 -1
  46. data_designer/engine/processing/ginja/environment.py +1 -1
  47. data_designer/engine/processing/gsonschema/validators.py +1 -1
  48. data_designer/engine/processing/processors/drop_columns.py +1 -1
  49. data_designer/engine/processing/processors/registry.py +3 -0
  50. data_designer/engine/processing/processors/schema_transform.py +53 -0
  51. data_designer/engine/resources/managed_dataset_repository.py +4 -4
  52. data_designer/engine/resources/managed_storage.py +1 -1
  53. data_designer/engine/sampling_gen/constraints.py +1 -1
  54. data_designer/engine/sampling_gen/data_sources/base.py +1 -1
  55. data_designer/engine/sampling_gen/entities/email_address_utils.py +1 -1
  56. data_designer/engine/sampling_gen/entities/national_id_utils.py +1 -1
  57. data_designer/engine/sampling_gen/entities/person.py +1 -1
  58. data_designer/engine/sampling_gen/entities/phone_number.py +1 -1
  59. data_designer/engine/sampling_gen/people_gen.py +3 -3
  60. data_designer/engine/secret_resolver.py +1 -1
  61. data_designer/engine/validators/python.py +2 -2
  62. data_designer/essentials/__init__.py +20 -128
  63. data_designer/interface/data_designer.py +23 -19
  64. data_designer/interface/results.py +36 -0
  65. data_designer/logging.py +2 -2
  66. data_designer/plugin_manager.py +14 -26
  67. data_designer/plugins/registry.py +1 -1
  68. {data_designer-0.1.3.dist-info → data_designer-0.1.5.dist-info}/METADATA +9 -9
  69. {data_designer-0.1.3.dist-info → data_designer-0.1.5.dist-info}/RECORD +72 -70
  70. {data_designer-0.1.3.dist-info → data_designer-0.1.5.dist-info}/WHEEL +0 -0
  71. {data_designer-0.1.3.dist-info → data_designer-0.1.5.dist-info}/entry_points.txt +0 -0
  72. {data_designer-0.1.3.dist-info → data_designer-0.1.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,137 +1,29 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
- from ..logging import LoggingConfig, configure_logging
3
+
4
+ from data_designer.config.default_model_settings import resolve_seed_default_model_settings
5
+ from data_designer.config.exports import * # noqa: F403
6
+ from data_designer.config.validator_params import LocalCallableValidatorParams
7
+ from data_designer.interface.data_designer import DataDesigner
8
+ from data_designer.logging import LoggingConfig, configure_logging
4
9
 
5
10
  configure_logging(LoggingConfig.default())
6
11
 
7
- from ..config.analysis.column_profilers import JudgeScoreProfilerConfig
8
- from ..config.column_configs import (
9
- ExpressionColumnConfig,
10
- LLMCodeColumnConfig,
11
- LLMJudgeColumnConfig,
12
- LLMStructuredColumnConfig,
13
- LLMTextColumnConfig,
14
- SamplerColumnConfig,
15
- Score,
16
- SeedDatasetColumnConfig,
17
- ValidationColumnConfig,
18
- )
19
- from ..config.column_types import DataDesignerColumnType
20
- from ..config.config_builder import DataDesignerConfigBuilder
21
- from ..config.data_designer_config import DataDesignerConfig
22
- from ..config.dataset_builders import BuildStage
23
- from ..config.datastore import DatastoreSettings
24
- from ..config.models import (
25
- ImageContext,
26
- ImageFormat,
27
- InferenceParameters,
28
- ManualDistribution,
29
- ManualDistributionParams,
30
- Modality,
31
- ModalityContext,
32
- ModalityDataType,
33
- ModelConfig,
34
- UniformDistribution,
35
- UniformDistributionParams,
36
- )
37
- from ..config.processors import DropColumnsProcessorConfig, ProcessorType
38
- from ..config.sampler_constraints import ColumnInequalityConstraint, ScalarInequalityConstraint
39
- from ..config.sampler_params import (
40
- BernoulliMixtureSamplerParams,
41
- BernoulliSamplerParams,
42
- BinomialSamplerParams,
43
- CategorySamplerParams,
44
- DatetimeSamplerParams,
45
- GaussianSamplerParams,
46
- PersonFromFakerSamplerParams,
47
- PersonSamplerParams,
48
- PoissonSamplerParams,
49
- SamplerType,
50
- ScipySamplerParams,
51
- SubcategorySamplerParams,
52
- TimeDeltaSamplerParams,
53
- UniformSamplerParams,
54
- UUIDSamplerParams,
55
- )
56
- from ..config.seed import DatastoreSeedDatasetReference, IndexRange, PartitionBlock, SamplingStrategy, SeedConfig
57
- from ..config.utils.code_lang import CodeLang
58
- from ..config.utils.info import InfoType
59
- from ..config.utils.misc import can_run_data_designer_locally
60
- from ..config.validator_params import (
61
- CodeValidatorParams,
62
- RemoteValidatorParams,
63
- ValidatorType,
64
- )
12
+ # Resolve default model settings on import to ensure they are available when the library is used.
13
+ resolve_seed_default_model_settings()
14
+
65
15
 
66
- local_library_imports = []
67
- try:
68
- if can_run_data_designer_locally():
69
- from ..config.validator_params import LocalCallableValidatorParams # noqa: F401
70
- from ..engine.model_provider import ModelProvider # noqa: F401
71
- from ..interface.data_designer import DataDesigner # noqa: F401
16
+ def get_essentials_exports() -> list[str]:
17
+ logging = [
18
+ configure_logging.__name__,
19
+ LoggingConfig.__name__,
20
+ ]
21
+ local = [
22
+ DataDesigner.__name__,
23
+ LocalCallableValidatorParams.__name__,
24
+ ]
72
25
 
73
- local_library_imports = ["DataDesigner", "LocalCallableValidatorParams", "ModelProvider"]
74
- except ModuleNotFoundError:
75
- pass
26
+ return logging + local + get_config_exports() # noqa: F405
76
27
 
77
- __all__ = [
78
- "BernoulliMixtureSamplerParams",
79
- "BernoulliSamplerParams",
80
- "BinomialSamplerParams",
81
- "CategorySamplerParams",
82
- "CodeLang",
83
- "CodeValidatorParams",
84
- "ColumnInequalityConstraint",
85
- "configure_logging",
86
- "DataDesignerColumnType",
87
- "DataDesignerConfig",
88
- "DataDesignerConfigBuilder",
89
- "BuildStage",
90
- "DatastoreSeedDatasetReference",
91
- "DatastoreSettings",
92
- "DatetimeSamplerParams",
93
- "DropColumnsProcessorConfig",
94
- "ExpressionColumnConfig",
95
- "GaussianSamplerParams",
96
- "IndexRange",
97
- "InfoType",
98
- "ImageContext",
99
- "ImageFormat",
100
- "InferenceParameters",
101
- "JudgeScoreProfilerConfig",
102
- "LLMCodeColumnConfig",
103
- "LLMJudgeColumnConfig",
104
- "LLMStructuredColumnConfig",
105
- "LLMTextColumnConfig",
106
- "LoggingConfig",
107
- "ManualDistribution",
108
- "ManualDistributionParams",
109
- "Modality",
110
- "ModalityContext",
111
- "ModalityDataType",
112
- "ModelConfig",
113
- "PartitionBlock",
114
- "PersonSamplerParams",
115
- "PersonFromFakerSamplerParams",
116
- "PoissonSamplerParams",
117
- "ProcessorType",
118
- "RemoteValidatorParams",
119
- "SamplerColumnConfig",
120
- "SamplerType",
121
- "SamplingStrategy",
122
- "ScalarInequalityConstraint",
123
- "ScipySamplerParams",
124
- "Score",
125
- "SeedConfig",
126
- "SeedDatasetColumnConfig",
127
- "SubcategorySamplerParams",
128
- "TimeDeltaSamplerParams",
129
- "UniformDistribution",
130
- "UniformDistributionParams",
131
- "UniformSamplerParams",
132
- "UUIDSamplerParams",
133
- "ValidationColumnConfig",
134
- "ValidatorType",
135
- ]
136
28
 
137
- __all__.extend(local_library_imports)
29
+ __all__ = get_essentials_exports()
@@ -13,7 +13,6 @@ from data_designer.config.default_model_settings import (
13
13
  get_default_model_providers_missing_api_keys,
14
14
  get_default_provider_name,
15
15
  get_default_providers,
16
- resolve_seed_default_model_settings,
17
16
  )
18
17
  from data_designer.config.interface import DataDesignerInterface
19
18
  from data_designer.config.models import (
@@ -31,7 +30,6 @@ from data_designer.config.utils.constants import (
31
30
  )
32
31
  from data_designer.config.utils.info import InfoType, InterfaceInfo
33
32
  from data_designer.config.utils.io_helpers import write_seed_dataset
34
- from data_designer.config.utils.misc import can_run_data_designer_locally
35
33
  from data_designer.engine.analysis.dataset_profiler import (
36
34
  DataDesignerDatasetProfiler,
37
35
  DatasetProfilerConfig,
@@ -66,11 +64,6 @@ DEFAULT_BUFFER_SIZE = 1000
66
64
  logger = logging.getLogger(__name__)
67
65
 
68
66
 
69
- # Resolve default model settings on import to ensure they are available when the library is used.
70
- if can_run_data_designer_locally():
71
- resolve_seed_default_model_settings()
72
-
73
-
74
67
  class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
75
68
  """Main interface for creating datasets with Data Designer.
76
69
 
@@ -256,6 +249,17 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
256
249
  except Exception as e:
257
250
  raise DataDesignerProfilingError(f"🛑 Error profiling preview dataset: {e}")
258
251
 
252
+ if builder.artifact_storage.processors_outputs_path.exists():
253
+ processor_artifacts = {
254
+ processor_config.name: pd.read_parquet(
255
+ builder.artifact_storage.processors_outputs_path / f"{processor_config.name}.parquet",
256
+ dtype_backend="pyarrow",
257
+ ).to_dict(orient="records")
258
+ for processor_config in config_builder.get_processor_configs()
259
+ }
260
+ else:
261
+ processor_artifacts = {}
262
+
259
263
  if (
260
264
  len(processed_dataset) > 0
261
265
  and isinstance(analysis, DatasetProfilerResults)
@@ -266,6 +270,7 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
266
270
  return PreviewResults(
267
271
  dataset=processed_dataset,
268
272
  analysis=analysis,
273
+ processor_artifacts=processor_artifacts,
269
274
  config_builder=config_builder,
270
275
  )
271
276
 
@@ -315,18 +320,17 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
315
320
 
316
321
  def _resolve_model_providers(self, model_providers: list[ModelProvider] | None) -> list[ModelProvider]:
317
322
  if model_providers is None:
318
- if can_run_data_designer_locally():
319
- model_providers = get_default_providers()
320
- missing_api_keys = get_default_model_providers_missing_api_keys()
321
- if len(missing_api_keys) == len(PREDEFINED_PROVIDERS):
322
- logger.warning(
323
- "🚨 You are trying to use a default model provider but your API keys are missing."
324
- "\n\t\t\tSet the API key for the default providers you intend to use and re-initialize the Data Designer object."
325
- "\n\t\t\tAlternatively, you can provide your own model providers during Data Designer object initialization."
326
- "\n\t\t\tSee https://nvidia-nemo.github.io/DataDesigner/models/model-providers/ for more information."
327
- )
328
- self._get_interface_info(model_providers).display(InfoType.MODEL_PROVIDERS)
329
- return model_providers
323
+ model_providers = get_default_providers()
324
+ missing_api_keys = get_default_model_providers_missing_api_keys()
325
+ if len(missing_api_keys) == len(PREDEFINED_PROVIDERS):
326
+ logger.warning(
327
+ "🚨 You are trying to use a default model provider but your API keys are missing."
328
+ "\n\t\t\tSet the API key for the default providers you intend to use and re-initialize the Data Designer object."
329
+ "\n\t\t\tAlternatively, you can provide your own model providers during Data Designer object initialization."
330
+ "\n\t\t\tSee https://nvidia-nemo.github.io/DataDesigner/concepts/models/model-providers/ for more information."
331
+ )
332
+ self._get_interface_info(model_providers).display(InfoType.MODEL_PROVIDERS)
333
+ return model_providers
330
334
  return model_providers or []
331
335
 
332
336
  def _create_dataset_builder(
@@ -3,12 +3,15 @@
3
3
 
4
4
  from __future__ import annotations
5
5
 
6
+ from pathlib import Path
7
+
6
8
  import pandas as pd
7
9
 
8
10
  from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
9
11
  from data_designer.config.config_builder import DataDesignerConfigBuilder
10
12
  from data_designer.config.utils.visualization import WithRecordSamplerMixin
11
13
  from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage
14
+ from data_designer.engine.dataset_builders.errors import ArtifactStorageError
12
15
 
13
16
 
14
17
  class DatasetCreationResults(WithRecordSamplerMixin):
@@ -53,3 +56,36 @@ class DatasetCreationResults(WithRecordSamplerMixin):
53
56
  A pandas DataFrame containing the full generated dataset.
54
57
  """
55
58
  return self.artifact_storage.load_dataset()
59
+
60
+ def load_processor_dataset(self, processor_name: str) -> pd.DataFrame:
61
+ """Load the dataset generated by a processor.
62
+
63
+ This only works for processors that write their artifacts in Parquet format.
64
+
65
+ Args:
66
+ processor_name: The name of the processor to load the dataset from.
67
+
68
+ Returns:
69
+ A pandas DataFrame containing the dataset generated by the processor.
70
+ """
71
+ try:
72
+ dataset = self.artifact_storage.read_parquet_files(
73
+ self.artifact_storage.processors_outputs_path / processor_name
74
+ )
75
+ except Exception as e:
76
+ raise ArtifactStorageError(f"Failed to load dataset for processor {processor_name}: {e}")
77
+
78
+ return dataset
79
+
80
+ def get_path_to_processor_artifacts(self, processor_name: str) -> Path:
81
+ """Get the path to the artifacts generated by a processor.
82
+
83
+ Args:
84
+ processor_name: The name of the processor to load the artifact from.
85
+
86
+ Returns:
87
+ The path to the artifacts.
88
+ """
89
+ if not self.artifact_storage.processors_outputs_path.exists():
90
+ raise ArtifactStorageError(f"Processor {processor_name} has no artifacts.")
91
+ return self.artifact_storage.processors_outputs_path / processor_name
data_designer/logging.py CHANGED
@@ -1,11 +1,11 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from dataclasses import dataclass, field
5
4
  import logging
6
- from pathlib import Path
7
5
  import random
8
6
  import sys
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
9
  from typing import TextIO, Union
10
10
 
11
11
  from pythonjsonlogger import jsonlogger
@@ -6,25 +6,16 @@ from __future__ import annotations
6
6
  from enum import Enum
7
7
  from typing import TYPE_CHECKING, Type, TypeAlias
8
8
 
9
- from .config.utils.misc import can_run_data_designer_locally
9
+ from data_designer.plugins.plugin import PluginType
10
+ from data_designer.plugins.registry import PluginRegistry
10
11
 
11
12
  if TYPE_CHECKING:
12
13
  from data_designer.plugins.plugin import Plugin
13
14
 
14
15
 
15
- if can_run_data_designer_locally():
16
- from data_designer.plugins.plugin import PluginType
17
- from data_designer.plugins.registry import PluginRegistry
18
-
19
-
20
16
  class PluginManager:
21
17
  def __init__(self):
22
- if can_run_data_designer_locally():
23
- self._plugins_supported = True
24
- self._plugin_registry = PluginRegistry()
25
- else:
26
- self._plugins_supported = False
27
- self._plugin_registry = None
18
+ self._plugin_registry = PluginRegistry()
28
19
 
29
20
  def get_column_generator_plugins(self) -> list[Plugin]:
30
21
  """Get all column generator plugins.
@@ -32,7 +23,7 @@ class PluginManager:
32
23
  Returns:
33
24
  A list of all column generator plugins.
34
25
  """
35
- return self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR) if self._plugins_supported else []
26
+ return self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR)
36
27
 
37
28
  def get_column_generator_plugin_if_exists(self, plugin_name: str) -> Plugin | None:
38
29
  """Get a column generator plugin by name if it exists.
@@ -43,9 +34,8 @@ class PluginManager:
43
34
  Returns:
44
35
  The plugin if found, otherwise None.
45
36
  """
46
- if self._plugins_supported and self._plugin_registry.plugin_exists(plugin_name):
37
+ if self._plugin_registry.plugin_exists(plugin_name):
47
38
  return self._plugin_registry.get_plugin(plugin_name)
48
- return None
49
39
 
50
40
  def get_plugin_column_types(self, enum_type: Type[Enum], required_resources: list[str] | None = None) -> list[Enum]:
51
41
  """Get a list of plugin column types.
@@ -58,13 +48,12 @@ class PluginManager:
58
48
  A list of plugin column types.
59
49
  """
60
50
  type_list = []
61
- if self._plugins_supported:
62
- for plugin in self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR):
63
- if required_resources:
64
- task_required_resources = plugin.task_cls.metadata().required_resources or []
65
- if not all(resource in task_required_resources for resource in required_resources):
66
- continue
67
- type_list.append(enum_type(plugin.name))
51
+ for plugin in self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR):
52
+ if required_resources:
53
+ task_required_resources = plugin.task_cls.metadata().required_resources or []
54
+ if not all(resource in task_required_resources for resource in required_resources):
55
+ continue
56
+ type_list.append(enum_type(plugin.name))
68
57
  return type_list
69
58
 
70
59
  def inject_into_column_config_type_union(self, column_config_type: Type[TypeAlias]) -> Type[TypeAlias]:
@@ -76,8 +65,7 @@ class PluginManager:
76
65
  Returns:
77
66
  The column config type with plugins injected.
78
67
  """
79
- if self._plugins_supported:
80
- column_config_type = self._plugin_registry.add_plugin_types_to_union(
81
- column_config_type, PluginType.COLUMN_GENERATOR
82
- )
68
+ column_config_type = self._plugin_registry.add_plugin_types_to_union(
69
+ column_config_type, PluginType.COLUMN_GENERATOR
70
+ )
83
71
  return column_config_type
@@ -1,10 +1,10 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from importlib.metadata import entry_points
5
4
  import logging
6
5
  import os
7
6
  import threading
7
+ from importlib.metadata import entry_points
8
8
  from typing import Type, TypeAlias
9
9
 
10
10
  from typing_extensions import Self
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: General framework for synthetic data generation
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -144,12 +144,12 @@ preview.display_sample_record()
144
144
 
145
145
  ### 📚 Learn more
146
146
 
147
- - **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/quick-start/)** – Detailed walkthrough with more examples
148
- - **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/)** – Step-by-step interactive tutorials
149
- - **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
150
- - **[Validators](https://nvidia-nemo.github.io/DataDesigner/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
151
- - **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/models/model-configs/)** – Configure custom models and providers
152
- - **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
147
+ - **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/latest/quick-start/)** – Detailed walkthrough with more examples
148
+ - **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/)** – Step-by-step interactive tutorials
149
+ - **[Column Types](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
150
+ - **[Validators](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
151
+ - **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/model-configs/)** – Configure custom models and providers
152
+ - **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
153
153
 
154
154
  ### 🔧 Configure models via CLI
155
155
 
@@ -161,7 +161,7 @@ data-designer config list # View current settings
161
161
 
162
162
  ### 🤝 Get involved
163
163
 
164
- - **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING)** – Help improve Data Designer
164
+ - **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/latest/CONTRIBUTING)** – Help improve Data Designer
165
165
  - **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or make a feature request
166
166
 
167
167
  ---
@@ -178,7 +178,7 @@ If you use NeMo Data Designer in your research, please cite it using the followi
178
178
 
179
179
  ```bibtex
180
180
  @misc{nemo-data-designer,
181
- author = {The NeMo Data Designer Team},
181
+ author = {The NeMo Data Designer Team, NVIDIA},
182
182
  title = {NeMo Data Designer: A framework for generating synthetic data from scratch or based on your own seed data},
183
183
  howpublished = {\url{https://github.com/NVIDIA-NeMo/DataDesigner}},
184
184
  year = {2025},