data-designer 0.3.8rc1__py3-none-any.whl → 0.3.8rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/_version.py +2 -2
- data_designer/config/default_model_settings.py +14 -6
- data_designer/config/run_config.py +3 -0
- data_designer/engine/dataset_builders/column_wise_builder.py +2 -5
- data_designer/engine/dataset_builders/utils/concurrency.py +0 -3
- data_designer/interface/data_designer.py +8 -5
- {data_designer-0.3.8rc1.dist-info → data_designer-0.3.8rc2.dist-info}/METADATA +1 -1
- {data_designer-0.3.8rc1.dist-info → data_designer-0.3.8rc2.dist-info}/RECORD +11 -11
- {data_designer-0.3.8rc1.dist-info → data_designer-0.3.8rc2.dist-info}/WHEEL +0 -0
- {data_designer-0.3.8rc1.dist-info → data_designer-0.3.8rc2.dist-info}/entry_points.txt +0 -0
- {data_designer-0.3.8rc1.dist-info → data_designer-0.3.8rc2.dist-info}/licenses/LICENSE +0 -0
data_designer/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.3.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 3, 8, '
|
|
31
|
+
__version__ = version = '0.3.8rc2'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 3, 8, 'rc2')
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -71,12 +71,20 @@ def get_default_model_configs() -> list[ModelConfig]:
|
|
|
71
71
|
return []
|
|
72
72
|
|
|
73
73
|
|
|
74
|
-
def
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
74
|
+
def get_providers_with_missing_api_keys(providers: list[ModelProvider]) -> list[ModelProvider]:
|
|
75
|
+
providers_with_missing_keys = []
|
|
76
|
+
|
|
77
|
+
for provider in providers:
|
|
78
|
+
if provider.api_key is None:
|
|
79
|
+
# No API key specified at all
|
|
80
|
+
providers_with_missing_keys.append(provider)
|
|
81
|
+
elif provider.api_key.isupper() and "_" in provider.api_key:
|
|
82
|
+
# Looks like an environment variable name, check if it's set
|
|
83
|
+
if os.environ.get(provider.api_key) is None:
|
|
84
|
+
providers_with_missing_keys.append(provider)
|
|
85
|
+
# else: It's an actual API key value (not an env var), so it's valid
|
|
86
|
+
|
|
87
|
+
return providers_with_missing_keys
|
|
80
88
|
|
|
81
89
|
|
|
82
90
|
def get_default_providers() -> list[ModelProvider]:
|
|
@@ -26,6 +26,8 @@ class RunConfig(ConfigBase):
|
|
|
26
26
|
buffer_size: Number of records to process in each batch during dataset generation.
|
|
27
27
|
A batch is processed end-to-end (column generation, post-batch processors, and writing the batch
|
|
28
28
|
to artifact storage) before moving on to the next batch. Must be > 0. Default is 1000.
|
|
29
|
+
non_inference_max_parallel_workers: Maximum number of worker threads used for non-inference
|
|
30
|
+
cell-by-cell generators. Must be >= 1. Default is 4.
|
|
29
31
|
max_conversation_restarts: Maximum number of full conversation restarts permitted when
|
|
30
32
|
generation tasks call `ModelFacade.generate(...)`. Must be >= 0. Default is 5.
|
|
31
33
|
max_conversation_correction_steps: Maximum number of correction rounds permitted within a
|
|
@@ -37,6 +39,7 @@ class RunConfig(ConfigBase):
|
|
|
37
39
|
shutdown_error_rate: float = Field(default=0.5, ge=0.0, le=1.0)
|
|
38
40
|
shutdown_error_window: int = Field(default=10, ge=0)
|
|
39
41
|
buffer_size: int = Field(default=1000, gt=0)
|
|
42
|
+
non_inference_max_parallel_workers: int = Field(default=4, ge=1)
|
|
40
43
|
max_conversation_restarts: int = Field(default=5, ge=0)
|
|
41
44
|
max_conversation_correction_steps: int = Field(default=0, ge=0)
|
|
42
45
|
|
|
@@ -31,10 +31,7 @@ from data_designer.engine.compiler import compile_data_designer_config
|
|
|
31
31
|
from data_designer.engine.dataset_builders.artifact_storage import SDG_CONFIG_FILENAME, ArtifactStorage
|
|
32
32
|
from data_designer.engine.dataset_builders.errors import DatasetGenerationError, DatasetProcessingError
|
|
33
33
|
from data_designer.engine.dataset_builders.multi_column_configs import MultiColumnConfig
|
|
34
|
-
from data_designer.engine.dataset_builders.utils.concurrency import
|
|
35
|
-
MAX_CONCURRENCY_PER_NON_LLM_GENERATOR,
|
|
36
|
-
ConcurrentThreadExecutor,
|
|
37
|
-
)
|
|
34
|
+
from data_designer.engine.dataset_builders.utils.concurrency import ConcurrentThreadExecutor
|
|
38
35
|
from data_designer.engine.dataset_builders.utils.config_compiler import compile_dataset_builder_column_configs
|
|
39
36
|
from data_designer.engine.dataset_builders.utils.dataset_batch_manager import DatasetBatchManager
|
|
40
37
|
from data_designer.engine.models.telemetry import InferenceEvent, NemoSourceEnum, TaskStatusEnum, TelemetryHandler
|
|
@@ -202,7 +199,7 @@ class ColumnWiseDatasetBuilder:
|
|
|
202
199
|
self.batch_manager.add_records(df.to_dict(orient="records"))
|
|
203
200
|
|
|
204
201
|
def _run_cell_by_cell_generator(self, generator: ColumnGenerator) -> None:
|
|
205
|
-
max_workers =
|
|
202
|
+
max_workers = self._resource_provider.run_config.non_inference_max_parallel_workers
|
|
206
203
|
if isinstance(generator, ColumnGeneratorWithModel):
|
|
207
204
|
max_workers = generator.inference_parameters.max_parallel_requests
|
|
208
205
|
self._fan_out_with_threads(generator, max_workers=max_workers)
|
|
@@ -16,9 +16,6 @@ from data_designer.engine.errors import DataDesignerRuntimeError, ErrorTrap
|
|
|
16
16
|
|
|
17
17
|
logger = logging.getLogger(__name__)
|
|
18
18
|
|
|
19
|
-
# Constants
|
|
20
|
-
MAX_CONCURRENCY_PER_NON_LLM_GENERATOR = 4
|
|
21
|
-
|
|
22
19
|
|
|
23
20
|
class ExecutorResults(BaseModel):
|
|
24
21
|
failure_threshold: float = 0.0 # Error rate threshold
|
|
@@ -12,9 +12,9 @@ from data_designer.config.config_builder import DataDesignerConfigBuilder
|
|
|
12
12
|
from data_designer.config.data_designer_config import DataDesignerConfig
|
|
13
13
|
from data_designer.config.default_model_settings import (
|
|
14
14
|
get_default_model_configs,
|
|
15
|
-
get_default_model_providers_missing_api_keys,
|
|
16
15
|
get_default_provider_name,
|
|
17
16
|
get_default_providers,
|
|
17
|
+
get_providers_with_missing_api_keys,
|
|
18
18
|
)
|
|
19
19
|
from data_designer.config.interface import DataDesignerInterface
|
|
20
20
|
from data_designer.config.models import (
|
|
@@ -28,7 +28,6 @@ from data_designer.config.utils.constants import (
|
|
|
28
28
|
MANAGED_ASSETS_PATH,
|
|
29
29
|
MODEL_CONFIGS_FILE_PATH,
|
|
30
30
|
MODEL_PROVIDERS_FILE_PATH,
|
|
31
|
-
PREDEFINED_PROVIDERS,
|
|
32
31
|
)
|
|
33
32
|
from data_designer.config.utils.info import InfoType, InterfaceInfo
|
|
34
33
|
from data_designer.engine.analysis.dataset_profiler import DataDesignerDatasetProfiler, DatasetProfilerConfig
|
|
@@ -317,7 +316,8 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
317
316
|
|
|
318
317
|
Args:
|
|
319
318
|
run_config: A RunConfig instance containing runtime settings such as
|
|
320
|
-
early shutdown behavior
|
|
319
|
+
early shutdown behavior, batch sizing via `buffer_size`, and non-inference worker
|
|
320
|
+
concurrency via `non_inference_max_parallel_workers`. Import RunConfig from
|
|
321
321
|
data_designer.essentials.
|
|
322
322
|
|
|
323
323
|
Example:
|
|
@@ -334,8 +334,11 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
334
334
|
def _resolve_model_providers(self, model_providers: list[ModelProvider] | None) -> list[ModelProvider]:
|
|
335
335
|
if model_providers is None:
|
|
336
336
|
model_providers = get_default_providers()
|
|
337
|
-
|
|
338
|
-
|
|
337
|
+
# Check which providers have missing API keys (from YAML file or env vars)
|
|
338
|
+
providers_with_missing_keys = get_providers_with_missing_api_keys(model_providers)
|
|
339
|
+
|
|
340
|
+
if len(providers_with_missing_keys) == len(model_providers):
|
|
341
|
+
# All providers have missing API keys
|
|
339
342
|
logger.warning(
|
|
340
343
|
"🚨 You are trying to use a default model provider but your API keys are missing."
|
|
341
344
|
"\n\t\t\tSet the API key for the default providers you intend to use and re-initialize the Data Designer object."
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
data_designer/__init__.py,sha256=iLr6FpW41-DFbGexuXCJ6gN1xBMNUZ2jfj9XxySmQhk,502
|
|
2
|
-
data_designer/_version.py,sha256=
|
|
2
|
+
data_designer/_version.py,sha256=wP4Vt8OKUu98a4RThXFwdZlfYV_E8tf8D24DFaCFjqg,714
|
|
3
3
|
data_designer/errors.py,sha256=r1pBvmvRBAsPmb7oF_veubhkxZ2uPo9cGEDwykLziX4,220
|
|
4
4
|
data_designer/lazy_heavy_imports.py,sha256=wULSEPQRUOZXvOnb0tdf6wNbRBpaaczYfAjY-pstCBM,1512
|
|
5
5
|
data_designer/logging.py,sha256=gRi9BOqm95UC1-u4pn6n-G4EySy9HhwKVyKLRO4aqm4,5382
|
|
@@ -42,14 +42,14 @@ data_designer/config/config_builder.py,sha256=vuPibkodbJxbCXdaI1tt1Uyo1SVCnAOfLB
|
|
|
42
42
|
data_designer/config/data_designer_config.py,sha256=qOojviug05vHR2S4800sjd4OmxhSVi6kB8SAFXLlPog,1891
|
|
43
43
|
data_designer/config/dataset_builders.py,sha256=jdCujJYFlKAiSkPNX2Qeyrs683GrRcCDv_m8ZZhtg64,368
|
|
44
44
|
data_designer/config/dataset_metadata.py,sha256=UTlEgnHWgjwPuc7bP95T7gaKmcr7pIhFMy9vvbUwMV4,647
|
|
45
|
-
data_designer/config/default_model_settings.py,sha256=
|
|
45
|
+
data_designer/config/default_model_settings.py,sha256=c-llH2otfG0tMCMsxoz3ZcS1nFxIQQPfRedFXAydDbc,4868
|
|
46
46
|
data_designer/config/errors.py,sha256=JhvUYecfLmP0gZjQzqA3OmfaSs9TRlC5E-ubnV_-3gs,560
|
|
47
47
|
data_designer/config/exports.py,sha256=lNwteK4djETKXrMKh5PPeHeZvPAZ5RpnJt2otpoaUz0,4756
|
|
48
48
|
data_designer/config/interface.py,sha256=ikmpm_KwencTpM-yg0auo7XMgcmMSa67S75IqdpFLfk,1676
|
|
49
49
|
data_designer/config/models.py,sha256=OekrXEVnI9WdHzEVk-8fO0NtxLZtjKVtCL03RY8qwYs,15457
|
|
50
50
|
data_designer/config/preview_results.py,sha256=WnPlDcHElIHNfjV_P-nLu_Dpul8D3Eyb5qyi3E173Gs,1744
|
|
51
51
|
data_designer/config/processors.py,sha256=lnyUZA1EhO9NWjjVFFioYxSgeYpoAaM1J7UzwOYkvms,6028
|
|
52
|
-
data_designer/config/run_config.py,sha256=
|
|
52
|
+
data_designer/config/run_config.py,sha256=oJ163DpHXu9PzST5Hn9px-bIP9DYjIkCO7UGB93J7bI,2663
|
|
53
53
|
data_designer/config/sampler_constraints.py,sha256=tQI1XLF5bS4TnyKMLo0nArvefnXI8dWCzov38r4qNCQ,1197
|
|
54
54
|
data_designer/config/sampler_params.py,sha256=Gio-53vjSYOdPhF2CEq4HSWCXCaZMy4WpGPbuFVcWOM,27965
|
|
55
55
|
data_designer/config/seed.py,sha256=eShSqOcSUzfCEZBnqY-rB0qZpRGxjeOE3fSaJAwacec,4668
|
|
@@ -101,11 +101,11 @@ data_designer/engine/column_generators/utils/generator_classification.py,sha256=
|
|
|
101
101
|
data_designer/engine/column_generators/utils/judge_score_factory.py,sha256=gESiqMrQzbbcFpZas0sAAAkrH2DL0Z4Nq5ywBO-pQ6k,2141
|
|
102
102
|
data_designer/engine/column_generators/utils/prompt_renderer.py,sha256=LATVAlDYwL7HyM7Nogd6n9XTTk-j9s64o4z0LpKHMhQ,4819
|
|
103
103
|
data_designer/engine/dataset_builders/artifact_storage.py,sha256=CKpTBtJTde7OQvsFZQa1v1autVz5yUxlBHkIKeATFnE,10999
|
|
104
|
-
data_designer/engine/dataset_builders/column_wise_builder.py,sha256=
|
|
104
|
+
data_designer/engine/dataset_builders/column_wise_builder.py,sha256=lzCSk3dFmdZvKLPAVIRNp9oJQsiilthHRW7mB4dUUB4,15716
|
|
105
105
|
data_designer/engine/dataset_builders/errors.py,sha256=gLXtPcGSMBG10PzQ85dOXskdA0mKbBQrHa_VtP9sbVY,400
|
|
106
106
|
data_designer/engine/dataset_builders/multi_column_configs.py,sha256=U4Pg0ETCBq5phRhb2zt8IFa4fRx-aTMakomKOBnrs0U,1660
|
|
107
107
|
data_designer/engine/dataset_builders/utils/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
108
|
-
data_designer/engine/dataset_builders/utils/concurrency.py,sha256=
|
|
108
|
+
data_designer/engine/dataset_builders/utils/concurrency.py,sha256=Lga_xd8i3ZAPqJlKCB4GHG7uxWxws1m-UGAz9UeqU_8,8283
|
|
109
109
|
data_designer/engine/dataset_builders/utils/config_compiler.py,sha256=NGI6U0vgG88d5YKj7oW_SIJ4-_fhA6VFhPbjqGRHea4,2441
|
|
110
110
|
data_designer/engine/dataset_builders/utils/dag.py,sha256=RIEI75OtiphkuDl1vfI_MQC1xMiiIg29s-0C_fNZkWQ,2613
|
|
111
111
|
data_designer/engine/dataset_builders/utils/dataset_batch_manager.py,sha256=IfWd_HcfEzIPhgFp2dJaxNIKRlrPsHqYATFXauvCfaw,8133
|
|
@@ -179,7 +179,7 @@ data_designer/engine/validators/remote.py,sha256=rythhIrH2GvqncMQeF3FiJa9Om0KZWe
|
|
|
179
179
|
data_designer/engine/validators/sql.py,sha256=AMaEdA-gj9j0zwVp809x3ycKltd51wVEhI8mMYGyxd4,2408
|
|
180
180
|
data_designer/essentials/__init__.py,sha256=dIGYH9s0_VQJ1lG8S-ElZiISz59LHo9v7Y5upizcA1M,1135
|
|
181
181
|
data_designer/interface/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
182
|
-
data_designer/interface/data_designer.py,sha256=
|
|
182
|
+
data_designer/interface/data_designer.py,sha256=nqsl2Et1wSF3TQzYx3gC1drJzrKlm4FtRE7QJYilbys,17544
|
|
183
183
|
data_designer/interface/errors.py,sha256=Ft9GMeIrOHJv_PC_1rU6hWcNyq1GHdsFYZSc9HnUrxU,606
|
|
184
184
|
data_designer/interface/results.py,sha256=3fGwlhif4ufqUGh-EgsGccrob4S6a7WZ6BgFiszTo_A,3871
|
|
185
185
|
data_designer/plugins/__init__.py,sha256=qe1alcTEtnMSMdzknjb57vvjqKgFE5cEHXxBj8tPWMI,275
|
|
@@ -189,8 +189,8 @@ data_designer/plugins/registry.py,sha256=Cnt33Q25o9bS2v2YDbV3QPM57VNrtIBKAb4ERQR
|
|
|
189
189
|
data_designer/plugins/testing/__init__.py,sha256=yyxrrH_i3q0Xb56QO9Ma35WtHlQ5PJF1b2pQoKa16xU,296
|
|
190
190
|
data_designer/plugins/testing/stubs.py,sha256=9tUF209ayZR6f0Q1LsRDW4kEOTgPoIxV8jlq4QoWuW0,3498
|
|
191
191
|
data_designer/plugins/testing/utils.py,sha256=a9LEgK827cnIzHEkgXOdgywrKDLBE36cyttrpG1ctT4,973
|
|
192
|
-
data_designer-0.3.
|
|
193
|
-
data_designer-0.3.
|
|
194
|
-
data_designer-0.3.
|
|
195
|
-
data_designer-0.3.
|
|
196
|
-
data_designer-0.3.
|
|
192
|
+
data_designer-0.3.8rc2.dist-info/METADATA,sha256=-gMHOKpWpc0HRy8QfQb4KQkEqeYJaHbxlL_l9M1PKmk,8122
|
|
193
|
+
data_designer-0.3.8rc2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
194
|
+
data_designer-0.3.8rc2.dist-info/entry_points.txt,sha256=NWWWidyDxN6CYX6y664PhBYMhbaYTQTyprqfYAgkyCg,57
|
|
195
|
+
data_designer-0.3.8rc2.dist-info/licenses/LICENSE,sha256=cSWJDwVqHyQgly8Zmt3pqXJ2eQbZVYwN9qd0NMssxXY,11336
|
|
196
|
+
data_designer-0.3.8rc2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|