data-designer-engine 0.4.0rc1__py3-none-any.whl → 0.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/engine/_version.py +2 -2
- data_designer/engine/column_generators/generators/llm_completion.py +3 -3
- data_designer/engine/models/registry.py +5 -0
- data_designer/engine/models/telemetry.py +8 -5
- data_designer/engine/models/utils.py +1 -1
- data_designer/engine/processing/processors/schema_transform.py +27 -5
- {data_designer_engine-0.4.0rc1.dist-info → data_designer_engine-0.4.0rc2.dist-info}/METADATA +1 -1
- {data_designer_engine-0.4.0rc1.dist-info → data_designer_engine-0.4.0rc2.dist-info}/RECORD +9 -9
- {data_designer_engine-0.4.0rc1.dist-info → data_designer_engine-0.4.0rc2.dist-info}/WHEEL +0 -0
data_designer/engine/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.4.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 4, 0, '
|
|
31
|
+
__version__ = version = '0.4.0rc2'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 4, 0, 'rc2')
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -62,9 +62,9 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
|
|
|
62
62
|
|
|
63
63
|
multi_modal_context = None
|
|
64
64
|
if self.config.multi_modal_context is not None and len(self.config.multi_modal_context) > 0:
|
|
65
|
-
multi_modal_context = [
|
|
66
|
-
|
|
67
|
-
|
|
65
|
+
multi_modal_context = []
|
|
66
|
+
for context in self.config.multi_modal_context:
|
|
67
|
+
multi_modal_context.extend(context.get_contexts(deserialized_record))
|
|
68
68
|
|
|
69
69
|
response, reasoning_trace = self.model.generate(
|
|
70
70
|
prompt=self.prompt_renderer.render(
|
|
@@ -107,6 +107,11 @@ class ModelRegistry:
|
|
|
107
107
|
def run_health_check(self, model_aliases: list[str]) -> None:
|
|
108
108
|
logger.info("🩺 Running health checks for models...")
|
|
109
109
|
for model_alias in model_aliases:
|
|
110
|
+
model_config = self.get_model_config(model_alias=model_alias)
|
|
111
|
+
if model_config.skip_health_check:
|
|
112
|
+
logger.info(f" |-- ⏭️ Skipping health check for model alias {model_alias!r} (skip_health_check=True)")
|
|
113
|
+
continue
|
|
114
|
+
|
|
110
115
|
model = self.get_model(model_alias=model_alias)
|
|
111
116
|
logger.info(
|
|
112
117
|
f" |-- 👀 Checking {model.model_name!r} in provider named {model.model_provider_name!r} for model alias {model.model_alias!r}..."
|
|
@@ -8,6 +8,7 @@ Environment variables:
|
|
|
8
8
|
- NEMO_TELEMETRY_ENABLED: Whether telemetry is enabled.
|
|
9
9
|
- NEMO_DEPLOYMENT_TYPE: The deployment type the event came from.
|
|
10
10
|
- NEMO_TELEMETRY_ENDPOINT: The endpoint to send the telemetry events to.
|
|
11
|
+
- NEMO_SESSION_PREFIX: Optional prefix to add to session IDs.
|
|
11
12
|
"""
|
|
12
13
|
|
|
13
14
|
from __future__ import annotations
|
|
@@ -18,15 +19,12 @@ import platform
|
|
|
18
19
|
from dataclasses import dataclass
|
|
19
20
|
from datetime import datetime, timezone
|
|
20
21
|
from enum import Enum
|
|
21
|
-
from typing import
|
|
22
|
+
from typing import Any, ClassVar
|
|
22
23
|
|
|
23
24
|
from pydantic import BaseModel, Field
|
|
24
25
|
|
|
25
26
|
from data_designer.lazy_heavy_imports import httpx
|
|
26
27
|
|
|
27
|
-
if TYPE_CHECKING:
|
|
28
|
-
import httpx
|
|
29
|
-
|
|
30
28
|
TELEMETRY_ENABLED = os.getenv("NEMO_TELEMETRY_ENABLED", "true").lower() in ("1", "true", "yes")
|
|
31
29
|
CLIENT_ID = "184482118588404"
|
|
32
30
|
NEMO_TELEMETRY_VERSION = "nemo-telemetry/1.0"
|
|
@@ -35,6 +33,7 @@ NEMO_TELEMETRY_ENDPOINT = os.getenv(
|
|
|
35
33
|
"NEMO_TELEMETRY_ENDPOINT", "https://events.telemetry.data.nvidia.com/v1.1/events/json"
|
|
36
34
|
).lower()
|
|
37
35
|
CPU_ARCHITECTURE = platform.uname().machine
|
|
36
|
+
SESSION_PREFIX = os.getenv("NEMO_SESSION_PREFIX")
|
|
38
37
|
|
|
39
38
|
|
|
40
39
|
class NemoSourceEnum(str, Enum):
|
|
@@ -231,7 +230,11 @@ class TelemetryHandler:
|
|
|
231
230
|
self._timer_task: asyncio.Task | None = None
|
|
232
231
|
self._running = False
|
|
233
232
|
self._source_client_version = source_client_version
|
|
234
|
-
|
|
233
|
+
# Apply session prefix if environment variable is set
|
|
234
|
+
if SESSION_PREFIX:
|
|
235
|
+
self._session_id = f"{SESSION_PREFIX}{session_id}"
|
|
236
|
+
else:
|
|
237
|
+
self._session_id = session_id
|
|
235
238
|
|
|
236
239
|
async def astart(self) -> None:
|
|
237
240
|
if self._running:
|
|
@@ -21,9 +21,9 @@ def prompt_to_messages(
|
|
|
21
21
|
user_content = user_prompt
|
|
22
22
|
if multi_modal_context and len(multi_modal_context) > 0:
|
|
23
23
|
user_content = []
|
|
24
|
-
user_content.append({"type": "text", "text": user_prompt})
|
|
25
24
|
for context in multi_modal_context:
|
|
26
25
|
user_content.append(context)
|
|
26
|
+
user_content.append({"type": "text", "text": user_prompt})
|
|
27
27
|
return (
|
|
28
28
|
[
|
|
29
29
|
str_to_message(content=system_prompt, role="system"),
|
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
|
-
from typing import TYPE_CHECKING
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
9
|
|
|
10
10
|
from data_designer.config.processors import SchemaTransformProcessorConfig
|
|
11
11
|
from data_designer.engine.dataset_builders.artifact_storage import BatchStage
|
|
@@ -20,6 +20,26 @@ if TYPE_CHECKING:
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
def _json_escape_record(record: dict[str, Any]) -> dict[str, Any]:
|
|
24
|
+
"""Escape record values for safe insertion into a JSON template."""
|
|
25
|
+
|
|
26
|
+
def escape_for_json_string(s: str) -> str:
|
|
27
|
+
"""Use json.dumps to escape, then strip the surrounding quotes."""
|
|
28
|
+
return json.dumps(s)[1:-1]
|
|
29
|
+
|
|
30
|
+
escaped = {}
|
|
31
|
+
for key, value in record.items():
|
|
32
|
+
if isinstance(value, str):
|
|
33
|
+
escaped[key] = escape_for_json_string(value)
|
|
34
|
+
elif isinstance(value, (dict, list)):
|
|
35
|
+
escaped[key] = escape_for_json_string(json.dumps(value))
|
|
36
|
+
elif value is None:
|
|
37
|
+
escaped[key] = "null"
|
|
38
|
+
else:
|
|
39
|
+
escaped[key] = str(value)
|
|
40
|
+
return escaped
|
|
41
|
+
|
|
42
|
+
|
|
23
43
|
class SchemaTransformProcessor(WithJinja2UserTemplateRendering, Processor[SchemaTransformProcessorConfig]):
|
|
24
44
|
@property
|
|
25
45
|
def template_as_str(self) -> str:
|
|
@@ -27,10 +47,12 @@ class SchemaTransformProcessor(WithJinja2UserTemplateRendering, Processor[Schema
|
|
|
27
47
|
|
|
28
48
|
def process(self, data: pd.DataFrame, *, current_batch_number: int | None = None) -> pd.DataFrame:
|
|
29
49
|
self.prepare_jinja2_template_renderer(self.template_as_str, data.columns.to_list())
|
|
30
|
-
formatted_records = [
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
50
|
+
formatted_records = []
|
|
51
|
+
for record in data.to_dict(orient="records"):
|
|
52
|
+
deserialized = deserialize_json_values(record)
|
|
53
|
+
escaped = _json_escape_record(deserialized)
|
|
54
|
+
rendered = self.render_template(escaped)
|
|
55
|
+
formatted_records.append(json.loads(rendered))
|
|
34
56
|
formatted_data = pd.DataFrame(formatted_records)
|
|
35
57
|
if current_batch_number is not None:
|
|
36
58
|
self.artifact_storage.write_batch_to_parquet_file(
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
data_designer/engine/__init__.py,sha256=ObZ6NUPeEvvpGTJ5WIGKUyIrIjaI747OM6ErweRtHxQ,137
|
|
2
|
-
data_designer/engine/_version.py,sha256=
|
|
2
|
+
data_designer/engine/_version.py,sha256=FvItxCBzPigrdVpFPfL1gQeV1-km5r7nCNGUzrYebTU,714
|
|
3
3
|
data_designer/engine/compiler.py,sha256=4QAeCJjINtH0afSXygdhiKMyq2KIfaDthK3ApZLgrQ0,4152
|
|
4
4
|
data_designer/engine/configurable_task.py,sha256=6R4FPXPzIeK0lqNVSEXzRDtK14B3dFz38lplr-nkvRE,2539
|
|
5
5
|
data_designer/engine/errors.py,sha256=YXI7ny83BQ16sOK43CpTm384hJTKuZkPTEAjlHlDIfA,1303
|
|
@@ -20,7 +20,7 @@ data_designer/engine/column_generators/generators/__init__.py,sha256=ObZ6NUPeEvv
|
|
|
20
20
|
data_designer/engine/column_generators/generators/base.py,sha256=QElk5KsaUQ3EYwlv40NcZgQsw3HIkX3YQV_0S3erl7Q,4209
|
|
21
21
|
data_designer/engine/column_generators/generators/embedding.py,sha256=uB0jgHlCgctgIUf9ZfMqG1YThbJ0g-GCX3VdNbdDSko,1407
|
|
22
22
|
data_designer/engine/column_generators/generators/expression.py,sha256=BiQcfVTinvQl3OI9nkdhB9B7FGBueWiHJwxTA8uNVuY,2330
|
|
23
|
-
data_designer/engine/column_generators/generators/llm_completion.py,sha256=
|
|
23
|
+
data_designer/engine/column_generators/generators/llm_completion.py,sha256=udYWE3lwaQhZqxRTHQc6w1kWGEvLAfIh2OUjX6vxMB0,4750
|
|
24
24
|
data_designer/engine/column_generators/generators/samplers.py,sha256=gNzURmu9K8Zb5MHamKvZPIxmWlFgl2W4FIVgaFcy4f0,3371
|
|
25
25
|
data_designer/engine/column_generators/generators/seed_dataset.py,sha256=CoQPbz4Ww7pBLaGw8-CYqIk1sjfkBaoRMKZQexdfgKY,6824
|
|
26
26
|
data_designer/engine/column_generators/generators/validation.py,sha256=YfYbk-8_ZUye0No6_Q7hIqpZv_tunnEZ6HkLSMFXlDE,6659
|
|
@@ -43,10 +43,10 @@ data_designer/engine/models/errors.py,sha256=k9oZnmk8DRD8U2SVKJJRLwrcdsCcVoJiOb_
|
|
|
43
43
|
data_designer/engine/models/facade.py,sha256=UBMpw_o2JcsWpJsPdpTPKfFZCh_i0eeG_oaWi1XeKds,12582
|
|
44
44
|
data_designer/engine/models/factory.py,sha256=2NjI0iiGv8ayQ1c249lsJtha4pDmvmtSjdwvlvitRds,1581
|
|
45
45
|
data_designer/engine/models/litellm_overrides.py,sha256=e9IZCFQ6BhNWlOTncm8ErL8w4rtE1_4USh2mtUYxCZI,6207
|
|
46
|
-
data_designer/engine/models/registry.py,sha256=
|
|
47
|
-
data_designer/engine/models/telemetry.py,sha256=
|
|
46
|
+
data_designer/engine/models/registry.py,sha256=Bid7Mv_ebzbTrlfzN-1wbcFxp_qQwilL0h2iwN5UPJ0,7099
|
|
47
|
+
data_designer/engine/models/telemetry.py,sha256=_VZR6Iatr6-5Hypw3bes5Jr4y7Y3VagxFEVAv36eHcE,12733
|
|
48
48
|
data_designer/engine/models/usage.py,sha256=A0LV9Ycuj_7snOsaqnirs4mlkAjozv2mzj2om2FpDoU,2410
|
|
49
|
-
data_designer/engine/models/utils.py,sha256=
|
|
49
|
+
data_designer/engine/models/utils.py,sha256=sLBs-STJSe7BGzDAngRGGxo6GwAvFmtimqUs54zZ6DU,1259
|
|
50
50
|
data_designer/engine/models/parsers/__init__.py,sha256=ObZ6NUPeEvvpGTJ5WIGKUyIrIjaI747OM6ErweRtHxQ,137
|
|
51
51
|
data_designer/engine/models/parsers/errors.py,sha256=ODcZ4TOsmZyH4-MoNkKXhjiMm_4gLWPsz90qKtNF9_Q,1053
|
|
52
52
|
data_designer/engine/models/parsers/parser.py,sha256=XkdDt2WEnolvsv2bArq4hhujfJ3kLmG6G2jkRXMYA8c,9489
|
|
@@ -69,7 +69,7 @@ data_designer/engine/processing/gsonschema/validators.py,sha256=ui3PzGjIclI6Hlw4
|
|
|
69
69
|
data_designer/engine/processing/processors/base.py,sha256=bkAQO0yK6ATJ3zTwS7F9FXobenJqydCyfijSP2MM-70,472
|
|
70
70
|
data_designer/engine/processing/processors/drop_columns.py,sha256=xT7ym2pQc-R0-YHIuYDQGFn2uAf74309-pV4H878Wlk,1866
|
|
71
71
|
data_designer/engine/processing/processors/registry.py,sha256=ewuFY8QeXpql5CNTZZa_87aYPGPNv1H0hpJR7CBVuzI,1097
|
|
72
|
-
data_designer/engine/processing/processors/schema_transform.py,sha256=
|
|
72
|
+
data_designer/engine/processing/processors/schema_transform.py,sha256=cpN5XAg_YNKpne_Ed3Vhk8_yuoTUiUy_pINgPZF2ASk,2822
|
|
73
73
|
data_designer/engine/registry/base.py,sha256=eACpE7o_c2btiiXrOFJw7o0VvACo7DSqhj8AntkNkCQ,3579
|
|
74
74
|
data_designer/engine/registry/data_designer_registry.py,sha256=mz8ksE49pS1JRVDNubYSxTs0j-8Q6sd08F_dYyTCWSE,1528
|
|
75
75
|
data_designer/engine/registry/errors.py,sha256=k1EaV7egNQwNmRsI8EfymTfeNprcDutPf2M6Vc1nbn8,350
|
|
@@ -108,6 +108,6 @@ data_designer/engine/validators/local_callable.py,sha256=JaL-yOXrTFpubiO2QlSt4Qb
|
|
|
108
108
|
data_designer/engine/validators/python.py,sha256=omXjwMaomQYiyq4g6XqKt2wexVuI_rWue9Dk-CYc-do,8039
|
|
109
109
|
data_designer/engine/validators/remote.py,sha256=rythhIrH2GvqncMQeF3FiJa9Om0KZWeK3cWjW-ZubaM,3077
|
|
110
110
|
data_designer/engine/validators/sql.py,sha256=AMaEdA-gj9j0zwVp809x3ycKltd51wVEhI8mMYGyxd4,2408
|
|
111
|
-
data_designer_engine-0.4.
|
|
112
|
-
data_designer_engine-0.4.
|
|
113
|
-
data_designer_engine-0.4.
|
|
111
|
+
data_designer_engine-0.4.0rc2.dist-info/METADATA,sha256=ZChyQl5ksGCWVi_XE6wD-GXG9-wWHko1vBDnd9ecLqw,1876
|
|
112
|
+
data_designer_engine-0.4.0rc2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
113
|
+
data_designer_engine-0.4.0rc2.dist-info/RECORD,,
|
|
File without changes
|