data-designer-engine 0.4.0rc1__py3-none-any.whl → 0.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.4.0rc1'
32
- __version_tuple__ = version_tuple = (0, 4, 0, 'rc1')
31
+ __version__ = version = '0.4.0rc2'
32
+ __version_tuple__ = version_tuple = (0, 4, 0, 'rc2')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -62,9 +62,9 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
62
62
 
63
63
  multi_modal_context = None
64
64
  if self.config.multi_modal_context is not None and len(self.config.multi_modal_context) > 0:
65
- multi_modal_context = [
66
- context.get_context(deserialized_record) for context in self.config.multi_modal_context
67
- ]
65
+ multi_modal_context = []
66
+ for context in self.config.multi_modal_context:
67
+ multi_modal_context.extend(context.get_contexts(deserialized_record))
68
68
 
69
69
  response, reasoning_trace = self.model.generate(
70
70
  prompt=self.prompt_renderer.render(
@@ -107,6 +107,11 @@ class ModelRegistry:
107
107
  def run_health_check(self, model_aliases: list[str]) -> None:
108
108
  logger.info("🩺 Running health checks for models...")
109
109
  for model_alias in model_aliases:
110
+ model_config = self.get_model_config(model_alias=model_alias)
111
+ if model_config.skip_health_check:
112
+ logger.info(f" |-- ⏭️ Skipping health check for model alias {model_alias!r} (skip_health_check=True)")
113
+ continue
114
+
110
115
  model = self.get_model(model_alias=model_alias)
111
116
  logger.info(
112
117
  f" |-- 👀 Checking {model.model_name!r} in provider named {model.model_provider_name!r} for model alias {model.model_alias!r}..."
@@ -8,6 +8,7 @@ Environment variables:
8
8
  - NEMO_TELEMETRY_ENABLED: Whether telemetry is enabled.
9
9
  - NEMO_DEPLOYMENT_TYPE: The deployment type the event came from.
10
10
  - NEMO_TELEMETRY_ENDPOINT: The endpoint to send the telemetry events to.
11
+ - NEMO_SESSION_PREFIX: Optional prefix to add to session IDs.
11
12
  """
12
13
 
13
14
  from __future__ import annotations
@@ -18,15 +19,12 @@ import platform
18
19
  from dataclasses import dataclass
19
20
  from datetime import datetime, timezone
20
21
  from enum import Enum
21
- from typing import TYPE_CHECKING, Any, ClassVar
22
+ from typing import Any, ClassVar
22
23
 
23
24
  from pydantic import BaseModel, Field
24
25
 
25
26
  from data_designer.lazy_heavy_imports import httpx
26
27
 
27
- if TYPE_CHECKING:
28
- import httpx
29
-
30
28
  TELEMETRY_ENABLED = os.getenv("NEMO_TELEMETRY_ENABLED", "true").lower() in ("1", "true", "yes")
31
29
  CLIENT_ID = "184482118588404"
32
30
  NEMO_TELEMETRY_VERSION = "nemo-telemetry/1.0"
@@ -35,6 +33,7 @@ NEMO_TELEMETRY_ENDPOINT = os.getenv(
35
33
  "NEMO_TELEMETRY_ENDPOINT", "https://events.telemetry.data.nvidia.com/v1.1/events/json"
36
34
  ).lower()
37
35
  CPU_ARCHITECTURE = platform.uname().machine
36
+ SESSION_PREFIX = os.getenv("NEMO_SESSION_PREFIX")
38
37
 
39
38
 
40
39
  class NemoSourceEnum(str, Enum):
@@ -231,7 +230,11 @@ class TelemetryHandler:
231
230
  self._timer_task: asyncio.Task | None = None
232
231
  self._running = False
233
232
  self._source_client_version = source_client_version
234
- self._session_id = session_id
233
+ # Apply session prefix if environment variable is set
234
+ if SESSION_PREFIX:
235
+ self._session_id = f"{SESSION_PREFIX}{session_id}"
236
+ else:
237
+ self._session_id = session_id
235
238
 
236
239
  async def astart(self) -> None:
237
240
  if self._running:
@@ -21,9 +21,9 @@ def prompt_to_messages(
21
21
  user_content = user_prompt
22
22
  if multi_modal_context and len(multi_modal_context) > 0:
23
23
  user_content = []
24
- user_content.append({"type": "text", "text": user_prompt})
25
24
  for context in multi_modal_context:
26
25
  user_content.append(context)
26
+ user_content.append({"type": "text", "text": user_prompt})
27
27
  return (
28
28
  [
29
29
  str_to_message(content=system_prompt, role="system"),
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
 
6
6
  import json
7
7
  import logging
8
- from typing import TYPE_CHECKING
8
+ from typing import TYPE_CHECKING, Any
9
9
 
10
10
  from data_designer.config.processors import SchemaTransformProcessorConfig
11
11
  from data_designer.engine.dataset_builders.artifact_storage import BatchStage
@@ -20,6 +20,26 @@ if TYPE_CHECKING:
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
22
 
23
+ def _json_escape_record(record: dict[str, Any]) -> dict[str, Any]:
24
+ """Escape record values for safe insertion into a JSON template."""
25
+
26
+ def escape_for_json_string(s: str) -> str:
27
+ """Use json.dumps to escape, then strip the surrounding quotes."""
28
+ return json.dumps(s)[1:-1]
29
+
30
+ escaped = {}
31
+ for key, value in record.items():
32
+ if isinstance(value, str):
33
+ escaped[key] = escape_for_json_string(value)
34
+ elif isinstance(value, (dict, list)):
35
+ escaped[key] = escape_for_json_string(json.dumps(value))
36
+ elif value is None:
37
+ escaped[key] = "null"
38
+ else:
39
+ escaped[key] = str(value)
40
+ return escaped
41
+
42
+
23
43
  class SchemaTransformProcessor(WithJinja2UserTemplateRendering, Processor[SchemaTransformProcessorConfig]):
24
44
  @property
25
45
  def template_as_str(self) -> str:
@@ -27,10 +47,12 @@ class SchemaTransformProcessor(WithJinja2UserTemplateRendering, Processor[Schema
27
47
 
28
48
  def process(self, data: pd.DataFrame, *, current_batch_number: int | None = None) -> pd.DataFrame:
29
49
  self.prepare_jinja2_template_renderer(self.template_as_str, data.columns.to_list())
30
- formatted_records = [
31
- json.loads(self.render_template(deserialize_json_values(record)).replace("\n", "\\n"))
32
- for record in data.to_dict(orient="records")
33
- ]
50
+ formatted_records = []
51
+ for record in data.to_dict(orient="records"):
52
+ deserialized = deserialize_json_values(record)
53
+ escaped = _json_escape_record(deserialized)
54
+ rendered = self.render_template(escaped)
55
+ formatted_records.append(json.loads(rendered))
34
56
  formatted_data = pd.DataFrame(formatted_records)
35
57
  if current_batch_number is not None:
36
58
  self.artifact_storage.write_batch_to_parquet_file(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer-engine
3
- Version: 0.4.0rc1
3
+ Version: 0.4.0rc2
4
4
  Summary: Generation engine for DataDesigner synthetic data generation
5
5
  License-Expression: Apache-2.0
6
6
  Classifier: Development Status :: 4 - Beta
@@ -1,5 +1,5 @@
1
1
  data_designer/engine/__init__.py,sha256=ObZ6NUPeEvvpGTJ5WIGKUyIrIjaI747OM6ErweRtHxQ,137
2
- data_designer/engine/_version.py,sha256=yib4WPM_pEWXdpIHBdFnf29aurTH5f4xrnwVlv7cijo,714
2
+ data_designer/engine/_version.py,sha256=FvItxCBzPigrdVpFPfL1gQeV1-km5r7nCNGUzrYebTU,714
3
3
  data_designer/engine/compiler.py,sha256=4QAeCJjINtH0afSXygdhiKMyq2KIfaDthK3ApZLgrQ0,4152
4
4
  data_designer/engine/configurable_task.py,sha256=6R4FPXPzIeK0lqNVSEXzRDtK14B3dFz38lplr-nkvRE,2539
5
5
  data_designer/engine/errors.py,sha256=YXI7ny83BQ16sOK43CpTm384hJTKuZkPTEAjlHlDIfA,1303
@@ -20,7 +20,7 @@ data_designer/engine/column_generators/generators/__init__.py,sha256=ObZ6NUPeEvv
20
20
  data_designer/engine/column_generators/generators/base.py,sha256=QElk5KsaUQ3EYwlv40NcZgQsw3HIkX3YQV_0S3erl7Q,4209
21
21
  data_designer/engine/column_generators/generators/embedding.py,sha256=uB0jgHlCgctgIUf9ZfMqG1YThbJ0g-GCX3VdNbdDSko,1407
22
22
  data_designer/engine/column_generators/generators/expression.py,sha256=BiQcfVTinvQl3OI9nkdhB9B7FGBueWiHJwxTA8uNVuY,2330
23
- data_designer/engine/column_generators/generators/llm_completion.py,sha256=3S3ikNLLLGnutUdcuswL5dUfcLgT_-he8DiRZ9K706U,4721
23
+ data_designer/engine/column_generators/generators/llm_completion.py,sha256=udYWE3lwaQhZqxRTHQc6w1kWGEvLAfIh2OUjX6vxMB0,4750
24
24
  data_designer/engine/column_generators/generators/samplers.py,sha256=gNzURmu9K8Zb5MHamKvZPIxmWlFgl2W4FIVgaFcy4f0,3371
25
25
  data_designer/engine/column_generators/generators/seed_dataset.py,sha256=CoQPbz4Ww7pBLaGw8-CYqIk1sjfkBaoRMKZQexdfgKY,6824
26
26
  data_designer/engine/column_generators/generators/validation.py,sha256=YfYbk-8_ZUye0No6_Q7hIqpZv_tunnEZ6HkLSMFXlDE,6659
@@ -43,10 +43,10 @@ data_designer/engine/models/errors.py,sha256=k9oZnmk8DRD8U2SVKJJRLwrcdsCcVoJiOb_
43
43
  data_designer/engine/models/facade.py,sha256=UBMpw_o2JcsWpJsPdpTPKfFZCh_i0eeG_oaWi1XeKds,12582
44
44
  data_designer/engine/models/factory.py,sha256=2NjI0iiGv8ayQ1c249lsJtha4pDmvmtSjdwvlvitRds,1581
45
45
  data_designer/engine/models/litellm_overrides.py,sha256=e9IZCFQ6BhNWlOTncm8ErL8w4rtE1_4USh2mtUYxCZI,6207
46
- data_designer/engine/models/registry.py,sha256=7hZ6TQwwZf259yRZmc3ZI20a4wAo3PCOozPi9Mc5KLo,6827
47
- data_designer/engine/models/telemetry.py,sha256=wmuekvPRZjNz7p7ImKx5H_hqDRhTv_dSB-u2S6Ze3uo,12502
46
+ data_designer/engine/models/registry.py,sha256=Bid7Mv_ebzbTrlfzN-1wbcFxp_qQwilL0h2iwN5UPJ0,7099
47
+ data_designer/engine/models/telemetry.py,sha256=_VZR6Iatr6-5Hypw3bes5Jr4y7Y3VagxFEVAv36eHcE,12733
48
48
  data_designer/engine/models/usage.py,sha256=A0LV9Ycuj_7snOsaqnirs4mlkAjozv2mzj2om2FpDoU,2410
49
- data_designer/engine/models/utils.py,sha256=HS5pXAAz7IcOcijeClC-xxq6R6DUmC2ykZu8Vr33Ivk,1259
49
+ data_designer/engine/models/utils.py,sha256=sLBs-STJSe7BGzDAngRGGxo6GwAvFmtimqUs54zZ6DU,1259
50
50
  data_designer/engine/models/parsers/__init__.py,sha256=ObZ6NUPeEvvpGTJ5WIGKUyIrIjaI747OM6ErweRtHxQ,137
51
51
  data_designer/engine/models/parsers/errors.py,sha256=ODcZ4TOsmZyH4-MoNkKXhjiMm_4gLWPsz90qKtNF9_Q,1053
52
52
  data_designer/engine/models/parsers/parser.py,sha256=XkdDt2WEnolvsv2bArq4hhujfJ3kLmG6G2jkRXMYA8c,9489
@@ -69,7 +69,7 @@ data_designer/engine/processing/gsonschema/validators.py,sha256=ui3PzGjIclI6Hlw4
69
69
  data_designer/engine/processing/processors/base.py,sha256=bkAQO0yK6ATJ3zTwS7F9FXobenJqydCyfijSP2MM-70,472
70
70
  data_designer/engine/processing/processors/drop_columns.py,sha256=xT7ym2pQc-R0-YHIuYDQGFn2uAf74309-pV4H878Wlk,1866
71
71
  data_designer/engine/processing/processors/registry.py,sha256=ewuFY8QeXpql5CNTZZa_87aYPGPNv1H0hpJR7CBVuzI,1097
72
- data_designer/engine/processing/processors/schema_transform.py,sha256=RhLXXKoj9MFpOqsXZ2hfSaTr7_yUUNI3gmFBS4XtEy4,2006
72
+ data_designer/engine/processing/processors/schema_transform.py,sha256=cpN5XAg_YNKpne_Ed3Vhk8_yuoTUiUy_pINgPZF2ASk,2822
73
73
  data_designer/engine/registry/base.py,sha256=eACpE7o_c2btiiXrOFJw7o0VvACo7DSqhj8AntkNkCQ,3579
74
74
  data_designer/engine/registry/data_designer_registry.py,sha256=mz8ksE49pS1JRVDNubYSxTs0j-8Q6sd08F_dYyTCWSE,1528
75
75
  data_designer/engine/registry/errors.py,sha256=k1EaV7egNQwNmRsI8EfymTfeNprcDutPf2M6Vc1nbn8,350
@@ -108,6 +108,6 @@ data_designer/engine/validators/local_callable.py,sha256=JaL-yOXrTFpubiO2QlSt4Qb
108
108
  data_designer/engine/validators/python.py,sha256=omXjwMaomQYiyq4g6XqKt2wexVuI_rWue9Dk-CYc-do,8039
109
109
  data_designer/engine/validators/remote.py,sha256=rythhIrH2GvqncMQeF3FiJa9Om0KZWeK3cWjW-ZubaM,3077
110
110
  data_designer/engine/validators/sql.py,sha256=AMaEdA-gj9j0zwVp809x3ycKltd51wVEhI8mMYGyxd4,2408
111
- data_designer_engine-0.4.0rc1.dist-info/METADATA,sha256=FybLz1fOjJ2bK0zQ93Ti17o7WZTxDFtrBeGx7Oa6jCo,1876
112
- data_designer_engine-0.4.0rc1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
113
- data_designer_engine-0.4.0rc1.dist-info/RECORD,,
111
+ data_designer_engine-0.4.0rc2.dist-info/METADATA,sha256=ZChyQl5ksGCWVi_XE6wD-GXG9-wWHko1vBDnd9ecLqw,1876
112
+ data_designer_engine-0.4.0rc2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
113
+ data_designer_engine-0.4.0rc2.dist-info/RECORD,,