harmony-client 0.1.0__cp312-cp312-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,78 @@
1
+ # ruff: noqa: F403, F401
2
+ from typing import TYPE_CHECKING
3
+
4
+ from .harmony_client import (
5
+ EvalSample as EvalSample,
6
+ )
7
+ from .harmony_client import (
8
+ EvalSampleInteraction as EvalSampleInteraction,
9
+ )
10
+ from .harmony_client import (
11
+ EvaluationArtifactBase as EvaluationArtifactBase,
12
+ )
13
+ from .harmony_client import (
14
+ Grade as Grade,
15
+ )
16
+ from .harmony_client import (
17
+ HarmonyClient as HarmonyClient,
18
+ )
19
+ from .harmony_client import (
20
+ HarmonyJobNotifier as HarmonyJobNotifier,
21
+ )
22
+ from .harmony_client import (
23
+ InferenceModel as InferenceModel,
24
+ )
25
+ from .harmony_client import (
26
+ JobArtifact as JobArtifact,
27
+ )
28
+ from .harmony_client import (
29
+ JobNotifier as JobNotifier,
30
+ )
31
+ from .harmony_client import (
32
+ ModelBuilder as ModelBuilder,
33
+ )
34
+ from .harmony_client import (
35
+ StageNotifier as StageNotifier,
36
+ )
37
+ from .harmony_client import (
38
+ StringThread as StringThread,
39
+ )
40
+ from .harmony_client import (
41
+ TokenizedThread as TokenizedThread,
42
+ )
43
+ from .harmony_client import (
44
+ TrainingModel as TrainingModel,
45
+ )
46
+ from .harmony_client import (
47
+ get_client as get_client,
48
+ )
49
+
50
+ if TYPE_CHECKING:
51
+ from .harmony_client import StringTurn as StringTurn
52
+ else:
53
+ from typing import NamedTuple
54
+
55
+ class StringTurn(NamedTuple):
56
+ role: str
57
+ content: str
58
+
59
+
60
+ # Ensure key classes are available at module level
61
+ __all__ = [
62
+ "StringThread",
63
+ "StringTurn",
64
+ "TokenizedThread",
65
+ "InferenceModel",
66
+ "ModelBuilder",
67
+ "TrainingModel",
68
+ "HarmonyClient",
69
+ "get_client",
70
+ "Grade",
71
+ "EvalSample",
72
+ "EvalSampleInteraction",
73
+ "JobArtifact",
74
+ "JobNotifier",
75
+ "HarmonyJobNotifier",
76
+ "StageNotifier",
77
+ "EvaluationArtifactBase",
78
+ ]
@@ -0,0 +1,5 @@
1
+ from harmony_client.artifacts.custom_artifact import CustomArtifact
2
+ from harmony_client.artifacts.dataset_artifact import DatasetArtifact, DatasetSampleType
3
+ from harmony_client.artifacts.model_artifact import ModelArtifact
4
+
5
+ __all__ = ["CustomArtifact", "DatasetArtifact", "ModelArtifact", "DatasetSampleType"]
@@ -0,0 +1,46 @@
1
+ import uuid
2
+
3
+ from harmony_client import JobArtifact
4
+ from harmony_client.runtime.context import RecipeContext
5
+
6
+
7
+ class CustomArtifact:
8
+ def __init__(self, name: str, ctx: RecipeContext, file: str | None = None) -> None:
9
+ self._base = JobArtifact(
10
+ id=str(uuid.uuid4()),
11
+ name=name,
12
+ kind="custom",
13
+ uri=f"file://artifacts/{file}" if file else None,
14
+ )
15
+ self.ctx = ctx
16
+ self.ctx.job.register_artifact(self._base)
17
+
18
+ @property
19
+ def id(self) -> str:
20
+ return self._base.id
21
+
22
+ @property
23
+ def name(self) -> str:
24
+ return self._base.name
25
+
26
+ @property
27
+ def kind(self) -> str:
28
+ return self._base.kind
29
+
30
+ @property
31
+ def uri(self) -> str:
32
+ assert self._base.uri is not None
33
+ return self._base.uri
34
+
35
+ def write_file(self, file_path: str) -> None:
36
+ self.ctx.file_storage.write(file_path, self.uri)
37
+
38
+ def append_file(self, file_path: str) -> None:
39
+ with open(file_path, "rb") as f:
40
+ self.ctx.file_storage.append(f.read(), self.uri)
41
+
42
+ def read_file(self, file_path: str) -> bytes:
43
+ return self.ctx.file_storage.read(file_path)
44
+
45
+ def __repr__(self):
46
+ return f"CustomArtifact(id={self.id}, name={self.name}, kind={self.kind}, uri={self.uri})"
@@ -0,0 +1,268 @@
1
+ import json
2
+ import logging
3
+ import uuid
4
+ from datetime import datetime
5
+ from typing import List, Self, Sequence
6
+
7
+ from harmony_client import JobArtifact, StringThread
8
+ from harmony_client.runtime.context import RecipeContext
9
+ from harmony_client.runtime.dto.AdaptiveDataset import AdaptiveDatasetKind
10
+ from harmony_client.runtime.dto.DatasetSampleFormats import (
11
+ DatasetMetricSample,
12
+ DatasetPreferenceSample,
13
+ DatasetPromptSample,
14
+ DatasetSample,
15
+ SampleMetadata,
16
+ )
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Union type for all supported dataset sample types
21
+ DatasetSampleType = DatasetSample | DatasetPromptSample | DatasetMetricSample | DatasetPreferenceSample
22
+
23
+
24
+ class DatasetArtifact:
25
+ """
26
+ Artifact for saving dataset samples generated during recipe execution.
27
+
28
+ Supports different dataset kinds (Prompt, Completion, Metric, Preference, Mixed)
29
+ and can save samples in JSONL format compatible with the platform's dataset format.
30
+ """
31
+
32
+ def __init__(self, name: str, ctx: RecipeContext, kind: AdaptiveDatasetKind = AdaptiveDatasetKind.Mixed) -> None:
33
+ """
34
+ Initialize a dataset artifact.
35
+
36
+ Args:
37
+ name: Name of the dataset artifact
38
+ ctx: Recipe context for file storage and job registration
39
+ kind: Type of dataset (Prompt, Completion, Metric, Preference, Mixed)
40
+ """
41
+ artifact_id = str(uuid.uuid4())
42
+ url = ctx.file_storage.mk_url(f"artifacts/dataset_samples_{artifact_id}.jsonl")
43
+
44
+ self._base = JobArtifact(
45
+ id=artifact_id,
46
+ name=name,
47
+ kind="dataset",
48
+ uri=url,
49
+ # Store dataset kind and sample count in metadata
50
+ dataset_kind=kind.value,
51
+ sample_count=0,
52
+ )
53
+ self.ctx = ctx
54
+ self.kind = kind
55
+ self._sample_count = 0
56
+ print(f"Registering artifact: {self._base}")
57
+ # Register artifact with the job
58
+ self.ctx.job.register_artifact(self._base)
59
+
60
+ @property
61
+ def id(self) -> str:
62
+ """Get the artifact ID."""
63
+ return self._base.id
64
+
65
+ @property
66
+ def name(self) -> str:
67
+ """Get the artifact name."""
68
+ return self._base.name
69
+
70
+ @property
71
+ def artifact_kind(self) -> str:
72
+ """Get the artifact kind (always 'dataset')."""
73
+ return self._base.kind
74
+
75
+ @property
76
+ def dataset_kind(self) -> AdaptiveDatasetKind:
77
+ """Get the dataset kind (Prompt, Completion, etc.)."""
78
+ return self.kind
79
+
80
+ @property
81
+ def uri(self) -> str:
82
+ """Get the artifact URI."""
83
+ assert self._base.uri is not None
84
+ return self._base.uri
85
+
86
+ @property
87
+ def sample_count(self) -> int:
88
+ """Get the number of samples added to this artifact."""
89
+ return self._sample_count
90
+
91
+ def add_samples_from_thread(self, threads: List[StringThread]) -> Self:
92
+ """
93
+ Add a dataset sample from a string thread.
94
+ """
95
+ return self.add_samples([self._thread_to_dataset_sample(thread) for thread in threads])
96
+
97
+ def add_samples(self, samples: Sequence[DatasetSampleType]) -> Self:
98
+ """
99
+ Add dataset samples to this artifact.
100
+
101
+ Args:
102
+ samples: List of dataset samples to add
103
+
104
+ Returns:
105
+ Self for method chaining
106
+
107
+ Raises:
108
+ ValueError: If samples list is empty
109
+ TypeError: If sample type doesn't match dataset kind
110
+ Exception: If serialization or storage fails
111
+ """
112
+ if not samples:
113
+ raise ValueError("Cannot add empty samples list")
114
+
115
+ try:
116
+ # Validate samples match the dataset kind (unless Mixed)
117
+ if self.kind != AdaptiveDatasetKind.Mixed:
118
+ self._validate_samples_kind(samples)
119
+
120
+ json_lines = "\n".join([self._sample_to_json(sample) for sample in samples])
121
+ # Convert samples to JSONL format
122
+ self.ctx.file_storage.append((json_lines + "\n").encode("utf-8"), self.uri)
123
+
124
+ self._sample_count += len(samples)
125
+ logger.debug(f"Added {len(samples)} samples to dataset artifact {self.id}")
126
+ except Exception as e:
127
+ logger.error(f"Failed to add samples to dataset artifact {self.id}: {e}")
128
+ raise
129
+
130
+ return self
131
+
132
+ def add_prompt_items(self, items: List[DatasetPromptSample]) -> Self:
133
+ """
134
+ Add prompt-only items to the dataset.
135
+
136
+ Args:
137
+ items: List of DatasetPromptSample objects
138
+
139
+ Returns:
140
+ Self for method chaining
141
+ """
142
+ return self.add_samples(items)
143
+
144
+ def add_completion_items(self, items: List[DatasetSample]) -> Self:
145
+ """
146
+ Add prompt-completion items to the dataset.
147
+
148
+ Args:
149
+ items: List of DatasetSample objects
150
+
151
+ Returns:
152
+ Self for method chaining
153
+ """
154
+ return self.add_samples(items)
155
+
156
+ def add_metric_items(self, items: List[DatasetMetricSample]) -> Self:
157
+ """
158
+ Add items with evaluation metrics to the dataset.
159
+
160
+ Args:
161
+ items: List of DatasetMetricSample objects
162
+
163
+ Returns:
164
+ Self for method chaining
165
+ """
166
+ return self.add_samples(items)
167
+
168
+ def add_preference_items(self, items: List[DatasetPreferenceSample]) -> Self:
169
+ """
170
+ Add preference items (good vs bad completions) to the dataset.
171
+
172
+ Args:
173
+ items: List of DatasetPreferenceSample objects
174
+
175
+ Returns:
176
+ Self for method chaining
177
+ """
178
+ return self.add_samples(items)
179
+
180
+ def write_jsonl(self, file_path: str) -> None:
181
+ """
182
+ Write the artifact contents to a local JSONL file.
183
+
184
+ Args:
185
+ file_path: Local path to write the JSONL file
186
+ """
187
+ content = self.ctx.file_storage.read(self.uri)
188
+ with open(file_path, "wb") as f:
189
+ f.write(content)
190
+
191
+ def _validate_samples_kind(self, samples: Sequence[DatasetSampleType]) -> None:
192
+ """Validate that samples match the expected dataset kind."""
193
+ expected_type = {
194
+ AdaptiveDatasetKind.Prompt: DatasetPromptSample,
195
+ AdaptiveDatasetKind.Completion: DatasetSample,
196
+ AdaptiveDatasetKind.Metric: DatasetMetricSample,
197
+ AdaptiveDatasetKind.Preference: DatasetPreferenceSample,
198
+ }.get(self.kind)
199
+
200
+ if expected_type:
201
+ for i, sample in enumerate(samples):
202
+ if not isinstance(sample, expected_type):
203
+ raise TypeError(f"Sample {i} is {type(sample)}, expected {expected_type} for {self.kind} dataset")
204
+
205
+ def _sample_to_json(self, sample: DatasetSampleType) -> str:
206
+ """Convert a dataset sample to JSON string."""
207
+ # Use pydantic's model_dump to get the dictionary representation
208
+ if hasattr(sample, "model_dump"):
209
+ sample_dict = sample.model_dump()
210
+ else:
211
+ # Manual conversion as fallback
212
+ sample_dict = sample.__dict__
213
+
214
+ return json.dumps(sample_dict, default=str) # default=str handles UUID serialization
215
+
216
+ def _create_default_metadata(self) -> SampleMetadata:
217
+ """Create default metadata for a sample."""
218
+ return SampleMetadata(
219
+ id=uuid.uuid4(), created_at=int(datetime.now().timestamp()), model_id=None, external_data=None
220
+ )
221
+
222
+ def _thread_to_dataset_sample(self, thread: StringThread) -> DatasetSampleType:
223
+ """Convert a string thread to a dataset sample."""
224
+ print(f"Converting thread to dataset sample: {thread}")
225
+ turns = thread.messages()
226
+ completion_text = thread.completion()
227
+ completion = ["assistant", completion_text] if completion_text else None
228
+ metadata = thread.metadata
229
+ match self.kind:
230
+ case AdaptiveDatasetKind.Prompt:
231
+ return DatasetPromptSample(
232
+ prompt=turns, # type: ignore
233
+ metadata=SampleMetadata(
234
+ id=uuid.uuid4(),
235
+ created_at=int(datetime.now().timestamp()),
236
+ model_id=None,
237
+ external_data=metadata,
238
+ ),
239
+ )
240
+ case AdaptiveDatasetKind.Completion:
241
+ return DatasetSample(
242
+ prompt=turns, # type: ignore
243
+ completion=completion, # type: ignore
244
+ metadata=SampleMetadata(
245
+ id=uuid.uuid4(),
246
+ created_at=int(datetime.now().timestamp()),
247
+ model_id=None,
248
+ external_data=metadata,
249
+ ),
250
+ )
251
+ case AdaptiveDatasetKind.Metric:
252
+ raise ValueError("Metric dataset kind is not supported with threads")
253
+ case AdaptiveDatasetKind.Preference:
254
+ raise ValueError("Preference dataset kind is not supported with threads")
255
+ case AdaptiveDatasetKind.Mixed:
256
+ return DatasetSample(
257
+ prompt=turns, # type: ignore
258
+ completion=completion, # type: ignore
259
+ metadata=SampleMetadata(
260
+ id=uuid.uuid4(),
261
+ created_at=int(datetime.now().timestamp()),
262
+ model_id=None,
263
+ external_data=metadata,
264
+ ),
265
+ )
266
+
267
+ def __repr__(self):
268
+ return f"DatasetArtifact(id={self.id}, name={self.name}, kind={self.dataset_kind}, samples={self.sample_count}, uri={self.uri})"
@@ -0,0 +1,34 @@
1
+ import uuid
2
+
3
+ from harmony_client import (
4
+ JobArtifact,
5
+ )
6
+ from harmony_client.runtime.context import RecipeContext
7
+
8
+
9
+ class ModelArtifact:
10
+ def __init__(self, key: str, ctx: RecipeContext) -> None:
11
+ self._base = JobArtifact(
12
+ id=str(uuid.uuid4()),
13
+ name=key,
14
+ kind="model",
15
+ model_key=key,
16
+ )
17
+ self.ctx = ctx
18
+ self.ctx.job.register_artifact(self._base)
19
+
20
+ @property
21
+ def id(self) -> str:
22
+ return self._base.id
23
+
24
+ @property
25
+ def name(self) -> str:
26
+ return self._base.name
27
+
28
+ @property
29
+ def kind(self) -> str:
30
+ return self._base.kind
31
+
32
+ @property
33
+ def model_key(self) -> str:
34
+ return self._base.metadata["model_key"]