harmony-client 0.1.0__cp312-cp312-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- harmony_client/__init__.py +78 -0
- harmony_client/artifacts/__init__.py +5 -0
- harmony_client/artifacts/custom_artifact.py +46 -0
- harmony_client/artifacts/dataset_artifact.py +268 -0
- harmony_client/artifacts/model_artifact.py +34 -0
- harmony_client/file_storage.py +378 -0
- harmony_client/harmony_client.cpython-312-darwin.so +0 -0
- harmony_client/harmony_client.pyi +1615 -0
- harmony_client/internal/__init__.py +7 -0
- harmony_client/internal/eval_samples_html.py +122 -0
- harmony_client/internal/utils.py +9 -0
- harmony_client/logging_table.py +121 -0
- harmony_client/parameters/__init__.py +295 -0
- harmony_client/parameters/dataset_kinds.py +49 -0
- harmony_client/parameters/model_kinds.py +13 -0
- harmony_client/py.typed +0 -0
- harmony_client/runtime/__init__.py +29 -0
- harmony_client/runtime/context.py +191 -0
- harmony_client/runtime/data.py +76 -0
- harmony_client/runtime/decorators.py +19 -0
- harmony_client/runtime/dto/AdaptiveDataset.py +23 -0
- harmony_client/runtime/dto/AdaptiveGrader.py +68 -0
- harmony_client/runtime/dto/AdaptiveModel.py +19 -0
- harmony_client/runtime/dto/DatasetSampleFormats.py +93 -0
- harmony_client/runtime/dto/__init__.py +2 -0
- harmony_client/runtime/dto/base.py +7 -0
- harmony_client/runtime/model_artifact_save.py +23 -0
- harmony_client/runtime/runner.py +368 -0
- harmony_client/runtime/simple_notifier.py +21 -0
- harmony_client-0.1.0.dist-info/METADATA +38 -0
- harmony_client-0.1.0.dist-info/RECORD +32 -0
- harmony_client-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# ruff: noqa: F403, F401
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
|
|
4
|
+
from .harmony_client import (
|
|
5
|
+
EvalSample as EvalSample,
|
|
6
|
+
)
|
|
7
|
+
from .harmony_client import (
|
|
8
|
+
EvalSampleInteraction as EvalSampleInteraction,
|
|
9
|
+
)
|
|
10
|
+
from .harmony_client import (
|
|
11
|
+
EvaluationArtifactBase as EvaluationArtifactBase,
|
|
12
|
+
)
|
|
13
|
+
from .harmony_client import (
|
|
14
|
+
Grade as Grade,
|
|
15
|
+
)
|
|
16
|
+
from .harmony_client import (
|
|
17
|
+
HarmonyClient as HarmonyClient,
|
|
18
|
+
)
|
|
19
|
+
from .harmony_client import (
|
|
20
|
+
HarmonyJobNotifier as HarmonyJobNotifier,
|
|
21
|
+
)
|
|
22
|
+
from .harmony_client import (
|
|
23
|
+
InferenceModel as InferenceModel,
|
|
24
|
+
)
|
|
25
|
+
from .harmony_client import (
|
|
26
|
+
JobArtifact as JobArtifact,
|
|
27
|
+
)
|
|
28
|
+
from .harmony_client import (
|
|
29
|
+
JobNotifier as JobNotifier,
|
|
30
|
+
)
|
|
31
|
+
from .harmony_client import (
|
|
32
|
+
ModelBuilder as ModelBuilder,
|
|
33
|
+
)
|
|
34
|
+
from .harmony_client import (
|
|
35
|
+
StageNotifier as StageNotifier,
|
|
36
|
+
)
|
|
37
|
+
from .harmony_client import (
|
|
38
|
+
StringThread as StringThread,
|
|
39
|
+
)
|
|
40
|
+
from .harmony_client import (
|
|
41
|
+
TokenizedThread as TokenizedThread,
|
|
42
|
+
)
|
|
43
|
+
from .harmony_client import (
|
|
44
|
+
TrainingModel as TrainingModel,
|
|
45
|
+
)
|
|
46
|
+
from .harmony_client import (
|
|
47
|
+
get_client as get_client,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
if TYPE_CHECKING:
|
|
51
|
+
from .harmony_client import StringTurn as StringTurn
|
|
52
|
+
else:
|
|
53
|
+
from typing import NamedTuple
|
|
54
|
+
|
|
55
|
+
class StringTurn(NamedTuple):
|
|
56
|
+
role: str
|
|
57
|
+
content: str
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# Ensure key classes are available at module level
|
|
61
|
+
__all__ = [
|
|
62
|
+
"StringThread",
|
|
63
|
+
"StringTurn",
|
|
64
|
+
"TokenizedThread",
|
|
65
|
+
"InferenceModel",
|
|
66
|
+
"ModelBuilder",
|
|
67
|
+
"TrainingModel",
|
|
68
|
+
"HarmonyClient",
|
|
69
|
+
"get_client",
|
|
70
|
+
"Grade",
|
|
71
|
+
"EvalSample",
|
|
72
|
+
"EvalSampleInteraction",
|
|
73
|
+
"JobArtifact",
|
|
74
|
+
"JobNotifier",
|
|
75
|
+
"HarmonyJobNotifier",
|
|
76
|
+
"StageNotifier",
|
|
77
|
+
"EvaluationArtifactBase",
|
|
78
|
+
]
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
from harmony_client.artifacts.custom_artifact import CustomArtifact
|
|
2
|
+
from harmony_client.artifacts.dataset_artifact import DatasetArtifact, DatasetSampleType
|
|
3
|
+
from harmony_client.artifacts.model_artifact import ModelArtifact
|
|
4
|
+
|
|
5
|
+
__all__ = ["CustomArtifact", "DatasetArtifact", "ModelArtifact", "DatasetSampleType"]
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
|
|
3
|
+
from harmony_client import JobArtifact
|
|
4
|
+
from harmony_client.runtime.context import RecipeContext
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CustomArtifact:
|
|
8
|
+
def __init__(self, name: str, ctx: RecipeContext, file: str | None = None) -> None:
|
|
9
|
+
self._base = JobArtifact(
|
|
10
|
+
id=str(uuid.uuid4()),
|
|
11
|
+
name=name,
|
|
12
|
+
kind="custom",
|
|
13
|
+
uri=f"file://artifacts/{file}" if file else None,
|
|
14
|
+
)
|
|
15
|
+
self.ctx = ctx
|
|
16
|
+
self.ctx.job.register_artifact(self._base)
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def id(self) -> str:
|
|
20
|
+
return self._base.id
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def name(self) -> str:
|
|
24
|
+
return self._base.name
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def kind(self) -> str:
|
|
28
|
+
return self._base.kind
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def uri(self) -> str:
|
|
32
|
+
assert self._base.uri is not None
|
|
33
|
+
return self._base.uri
|
|
34
|
+
|
|
35
|
+
def write_file(self, file_path: str) -> None:
|
|
36
|
+
self.ctx.file_storage.write(file_path, self.uri)
|
|
37
|
+
|
|
38
|
+
def append_file(self, file_path: str) -> None:
|
|
39
|
+
with open(file_path, "rb") as f:
|
|
40
|
+
self.ctx.file_storage.append(f.read(), self.uri)
|
|
41
|
+
|
|
42
|
+
def read_file(self, file_path: str) -> bytes:
|
|
43
|
+
return self.ctx.file_storage.read(file_path)
|
|
44
|
+
|
|
45
|
+
def __repr__(self):
|
|
46
|
+
return f"CustomArtifact(id={self.id}, name={self.name}, kind={self.kind}, uri={self.uri})"
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import uuid
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import List, Self, Sequence
|
|
6
|
+
|
|
7
|
+
from harmony_client import JobArtifact, StringThread
|
|
8
|
+
from harmony_client.runtime.context import RecipeContext
|
|
9
|
+
from harmony_client.runtime.dto.AdaptiveDataset import AdaptiveDatasetKind
|
|
10
|
+
from harmony_client.runtime.dto.DatasetSampleFormats import (
|
|
11
|
+
DatasetMetricSample,
|
|
12
|
+
DatasetPreferenceSample,
|
|
13
|
+
DatasetPromptSample,
|
|
14
|
+
DatasetSample,
|
|
15
|
+
SampleMetadata,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# Union type for all supported dataset sample types
|
|
21
|
+
DatasetSampleType = DatasetSample | DatasetPromptSample | DatasetMetricSample | DatasetPreferenceSample
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DatasetArtifact:
|
|
25
|
+
"""
|
|
26
|
+
Artifact for saving dataset samples generated during recipe execution.
|
|
27
|
+
|
|
28
|
+
Supports different dataset kinds (Prompt, Completion, Metric, Preference, Mixed)
|
|
29
|
+
and can save samples in JSONL format compatible with the platform's dataset format.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, name: str, ctx: RecipeContext, kind: AdaptiveDatasetKind = AdaptiveDatasetKind.Mixed) -> None:
|
|
33
|
+
"""
|
|
34
|
+
Initialize a dataset artifact.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
name: Name of the dataset artifact
|
|
38
|
+
ctx: Recipe context for file storage and job registration
|
|
39
|
+
kind: Type of dataset (Prompt, Completion, Metric, Preference, Mixed)
|
|
40
|
+
"""
|
|
41
|
+
artifact_id = str(uuid.uuid4())
|
|
42
|
+
url = ctx.file_storage.mk_url(f"artifacts/dataset_samples_{artifact_id}.jsonl")
|
|
43
|
+
|
|
44
|
+
self._base = JobArtifact(
|
|
45
|
+
id=artifact_id,
|
|
46
|
+
name=name,
|
|
47
|
+
kind="dataset",
|
|
48
|
+
uri=url,
|
|
49
|
+
# Store dataset kind and sample count in metadata
|
|
50
|
+
dataset_kind=kind.value,
|
|
51
|
+
sample_count=0,
|
|
52
|
+
)
|
|
53
|
+
self.ctx = ctx
|
|
54
|
+
self.kind = kind
|
|
55
|
+
self._sample_count = 0
|
|
56
|
+
print(f"Registering artifact: {self._base}")
|
|
57
|
+
# Register artifact with the job
|
|
58
|
+
self.ctx.job.register_artifact(self._base)
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def id(self) -> str:
|
|
62
|
+
"""Get the artifact ID."""
|
|
63
|
+
return self._base.id
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def name(self) -> str:
|
|
67
|
+
"""Get the artifact name."""
|
|
68
|
+
return self._base.name
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def artifact_kind(self) -> str:
|
|
72
|
+
"""Get the artifact kind (always 'dataset')."""
|
|
73
|
+
return self._base.kind
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def dataset_kind(self) -> AdaptiveDatasetKind:
|
|
77
|
+
"""Get the dataset kind (Prompt, Completion, etc.)."""
|
|
78
|
+
return self.kind
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def uri(self) -> str:
|
|
82
|
+
"""Get the artifact URI."""
|
|
83
|
+
assert self._base.uri is not None
|
|
84
|
+
return self._base.uri
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def sample_count(self) -> int:
|
|
88
|
+
"""Get the number of samples added to this artifact."""
|
|
89
|
+
return self._sample_count
|
|
90
|
+
|
|
91
|
+
def add_samples_from_thread(self, threads: List[StringThread]) -> Self:
|
|
92
|
+
"""
|
|
93
|
+
Add a dataset sample from a string thread.
|
|
94
|
+
"""
|
|
95
|
+
return self.add_samples([self._thread_to_dataset_sample(thread) for thread in threads])
|
|
96
|
+
|
|
97
|
+
def add_samples(self, samples: Sequence[DatasetSampleType]) -> Self:
|
|
98
|
+
"""
|
|
99
|
+
Add dataset samples to this artifact.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
samples: List of dataset samples to add
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Self for method chaining
|
|
106
|
+
|
|
107
|
+
Raises:
|
|
108
|
+
ValueError: If samples list is empty
|
|
109
|
+
TypeError: If sample type doesn't match dataset kind
|
|
110
|
+
Exception: If serialization or storage fails
|
|
111
|
+
"""
|
|
112
|
+
if not samples:
|
|
113
|
+
raise ValueError("Cannot add empty samples list")
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
# Validate samples match the dataset kind (unless Mixed)
|
|
117
|
+
if self.kind != AdaptiveDatasetKind.Mixed:
|
|
118
|
+
self._validate_samples_kind(samples)
|
|
119
|
+
|
|
120
|
+
json_lines = "\n".join([self._sample_to_json(sample) for sample in samples])
|
|
121
|
+
# Convert samples to JSONL format
|
|
122
|
+
self.ctx.file_storage.append((json_lines + "\n").encode("utf-8"), self.uri)
|
|
123
|
+
|
|
124
|
+
self._sample_count += len(samples)
|
|
125
|
+
logger.debug(f"Added {len(samples)} samples to dataset artifact {self.id}")
|
|
126
|
+
except Exception as e:
|
|
127
|
+
logger.error(f"Failed to add samples to dataset artifact {self.id}: {e}")
|
|
128
|
+
raise
|
|
129
|
+
|
|
130
|
+
return self
|
|
131
|
+
|
|
132
|
+
def add_prompt_items(self, items: List[DatasetPromptSample]) -> Self:
|
|
133
|
+
"""
|
|
134
|
+
Add prompt-only items to the dataset.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
items: List of DatasetPromptSample objects
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Self for method chaining
|
|
141
|
+
"""
|
|
142
|
+
return self.add_samples(items)
|
|
143
|
+
|
|
144
|
+
def add_completion_items(self, items: List[DatasetSample]) -> Self:
|
|
145
|
+
"""
|
|
146
|
+
Add prompt-completion items to the dataset.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
items: List of DatasetSample objects
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Self for method chaining
|
|
153
|
+
"""
|
|
154
|
+
return self.add_samples(items)
|
|
155
|
+
|
|
156
|
+
def add_metric_items(self, items: List[DatasetMetricSample]) -> Self:
|
|
157
|
+
"""
|
|
158
|
+
Add items with evaluation metrics to the dataset.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
items: List of DatasetMetricSample objects
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Self for method chaining
|
|
165
|
+
"""
|
|
166
|
+
return self.add_samples(items)
|
|
167
|
+
|
|
168
|
+
def add_preference_items(self, items: List[DatasetPreferenceSample]) -> Self:
|
|
169
|
+
"""
|
|
170
|
+
Add preference items (good vs bad completions) to the dataset.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
items: List of DatasetPreferenceSample objects
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Self for method chaining
|
|
177
|
+
"""
|
|
178
|
+
return self.add_samples(items)
|
|
179
|
+
|
|
180
|
+
def write_jsonl(self, file_path: str) -> None:
|
|
181
|
+
"""
|
|
182
|
+
Write the artifact contents to a local JSONL file.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
file_path: Local path to write the JSONL file
|
|
186
|
+
"""
|
|
187
|
+
content = self.ctx.file_storage.read(self.uri)
|
|
188
|
+
with open(file_path, "wb") as f:
|
|
189
|
+
f.write(content)
|
|
190
|
+
|
|
191
|
+
def _validate_samples_kind(self, samples: Sequence[DatasetSampleType]) -> None:
|
|
192
|
+
"""Validate that samples match the expected dataset kind."""
|
|
193
|
+
expected_type = {
|
|
194
|
+
AdaptiveDatasetKind.Prompt: DatasetPromptSample,
|
|
195
|
+
AdaptiveDatasetKind.Completion: DatasetSample,
|
|
196
|
+
AdaptiveDatasetKind.Metric: DatasetMetricSample,
|
|
197
|
+
AdaptiveDatasetKind.Preference: DatasetPreferenceSample,
|
|
198
|
+
}.get(self.kind)
|
|
199
|
+
|
|
200
|
+
if expected_type:
|
|
201
|
+
for i, sample in enumerate(samples):
|
|
202
|
+
if not isinstance(sample, expected_type):
|
|
203
|
+
raise TypeError(f"Sample {i} is {type(sample)}, expected {expected_type} for {self.kind} dataset")
|
|
204
|
+
|
|
205
|
+
def _sample_to_json(self, sample: DatasetSampleType) -> str:
|
|
206
|
+
"""Convert a dataset sample to JSON string."""
|
|
207
|
+
# Use pydantic's model_dump to get the dictionary representation
|
|
208
|
+
if hasattr(sample, "model_dump"):
|
|
209
|
+
sample_dict = sample.model_dump()
|
|
210
|
+
else:
|
|
211
|
+
# Manual conversion as fallback
|
|
212
|
+
sample_dict = sample.__dict__
|
|
213
|
+
|
|
214
|
+
return json.dumps(sample_dict, default=str) # default=str handles UUID serialization
|
|
215
|
+
|
|
216
|
+
def _create_default_metadata(self) -> SampleMetadata:
|
|
217
|
+
"""Create default metadata for a sample."""
|
|
218
|
+
return SampleMetadata(
|
|
219
|
+
id=uuid.uuid4(), created_at=int(datetime.now().timestamp()), model_id=None, external_data=None
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
def _thread_to_dataset_sample(self, thread: StringThread) -> DatasetSampleType:
|
|
223
|
+
"""Convert a string thread to a dataset sample."""
|
|
224
|
+
print(f"Converting thread to dataset sample: {thread}")
|
|
225
|
+
turns = thread.messages()
|
|
226
|
+
completion_text = thread.completion()
|
|
227
|
+
completion = ["assistant", completion_text] if completion_text else None
|
|
228
|
+
metadata = thread.metadata
|
|
229
|
+
match self.kind:
|
|
230
|
+
case AdaptiveDatasetKind.Prompt:
|
|
231
|
+
return DatasetPromptSample(
|
|
232
|
+
prompt=turns, # type: ignore
|
|
233
|
+
metadata=SampleMetadata(
|
|
234
|
+
id=uuid.uuid4(),
|
|
235
|
+
created_at=int(datetime.now().timestamp()),
|
|
236
|
+
model_id=None,
|
|
237
|
+
external_data=metadata,
|
|
238
|
+
),
|
|
239
|
+
)
|
|
240
|
+
case AdaptiveDatasetKind.Completion:
|
|
241
|
+
return DatasetSample(
|
|
242
|
+
prompt=turns, # type: ignore
|
|
243
|
+
completion=completion, # type: ignore
|
|
244
|
+
metadata=SampleMetadata(
|
|
245
|
+
id=uuid.uuid4(),
|
|
246
|
+
created_at=int(datetime.now().timestamp()),
|
|
247
|
+
model_id=None,
|
|
248
|
+
external_data=metadata,
|
|
249
|
+
),
|
|
250
|
+
)
|
|
251
|
+
case AdaptiveDatasetKind.Metric:
|
|
252
|
+
raise ValueError("Metric dataset kind is not supported with threads")
|
|
253
|
+
case AdaptiveDatasetKind.Preference:
|
|
254
|
+
raise ValueError("Preference dataset kind is not supported with threads")
|
|
255
|
+
case AdaptiveDatasetKind.Mixed:
|
|
256
|
+
return DatasetSample(
|
|
257
|
+
prompt=turns, # type: ignore
|
|
258
|
+
completion=completion, # type: ignore
|
|
259
|
+
metadata=SampleMetadata(
|
|
260
|
+
id=uuid.uuid4(),
|
|
261
|
+
created_at=int(datetime.now().timestamp()),
|
|
262
|
+
model_id=None,
|
|
263
|
+
external_data=metadata,
|
|
264
|
+
),
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
def __repr__(self):
|
|
268
|
+
return f"DatasetArtifact(id={self.id}, name={self.name}, kind={self.dataset_kind}, samples={self.sample_count}, uri={self.uri})"
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
|
|
3
|
+
from harmony_client import (
|
|
4
|
+
JobArtifact,
|
|
5
|
+
)
|
|
6
|
+
from harmony_client.runtime.context import RecipeContext
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ModelArtifact:
|
|
10
|
+
def __init__(self, key: str, ctx: RecipeContext) -> None:
|
|
11
|
+
self._base = JobArtifact(
|
|
12
|
+
id=str(uuid.uuid4()),
|
|
13
|
+
name=key,
|
|
14
|
+
kind="model",
|
|
15
|
+
model_key=key,
|
|
16
|
+
)
|
|
17
|
+
self.ctx = ctx
|
|
18
|
+
self.ctx.job.register_artifact(self._base)
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def id(self) -> str:
|
|
22
|
+
return self._base.id
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def name(self) -> str:
|
|
26
|
+
return self._base.name
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def kind(self) -> str:
|
|
30
|
+
return self._base.kind
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def model_key(self) -> str:
|
|
34
|
+
return self._base.metadata["model_key"]
|