llmcomp 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llmcomp/__init__.py CHANGED
@@ -1,3 +1,7 @@
1
1
  from llmcomp.config import Config
2
2
  from llmcomp.question.question import Question
3
+ from llmcomp.runner.model_adapter import ModelAdapter
3
4
  from llmcomp.runner.runner import Runner
5
+
6
+ # Import to register default model adapters
7
+ import llmcomp.default_adapters # noqa: F401
llmcomp/config.py CHANGED
@@ -106,6 +106,7 @@ class Config(metaclass=_ConfigMeta):
106
106
  # Default values for reset()
107
107
  _defaults = {
108
108
  "timeout": 60,
109
+ "reasoning_effort": "none",
109
110
  "max_workers": 100,
110
111
  "cache_dir": "llmcomp_cache",
111
112
  "yaml_dir": "questions",
@@ -115,6 +116,11 @@ class Config(metaclass=_ConfigMeta):
115
116
  # API request timeout in seconds
116
117
  timeout: int = _defaults["timeout"]
117
118
 
119
+ # Reasoning effort for reasoning models (o1, o3, gpt-5, etc.)
120
+ # Available values: "none", "minimal", "low", "medium", "high", "xhigh"
121
+ # NOTE: with "none" (default), you don't get answers from models before gpt-5.1
122
+ reasoning_effort: str = _defaults["reasoning_effort"]
123
+
118
124
  # Maximum number of concurrent API requests (total across all models, not per model).
119
125
  # When querying multiple models, they share a single thread pool of this size.
120
126
  max_workers: int = _defaults["max_workers"]
@@ -216,23 +222,12 @@ class Config(metaclass=_ConfigMeta):
216
222
  @classmethod
217
223
  def _test_url_key_pair(cls, model: str, url: str, key: str) -> openai.OpenAI | None:
218
224
  """Test if a url-key pair works for the given model."""
225
+ from llmcomp.runner.model_adapter import ModelAdapter
226
+
219
227
  try:
220
228
  client = openai.OpenAI(api_key=key, base_url=url)
221
- args = {
222
- "client": client,
223
- "model": model,
224
- "messages": [{"role": "user", "content": "Hi"}],
225
- "timeout": 30, # tinker sometimes takes a while
226
- }
227
- if not (model.startswith("o") or model.startswith("gpt-5")):
228
- args["max_tokens"] = 1
229
- else:
230
- if model.startswith("gpt-5"):
231
- args["max_completion_tokens"] = 16
232
- else:
233
- args["max_completion_tokens"] = 1
234
-
235
- openai_chat_completion(**args)
229
+ params = ModelAdapter.test_request_params(model)
230
+ openai_chat_completion(client=client, **params)
236
231
  except (
237
232
  openai.NotFoundError,
238
233
  openai.BadRequestError,
@@ -0,0 +1,81 @@
1
+ """Model-specific logic.
2
+
3
+ You might want to register your own handlers for specific models.
4
+ Just add more ModelAdapter.register() calls somewhere in your code.
5
+
6
+ Later-registered handlers can override earlier-registered handlers.
7
+ """
8
+
9
+ from llmcomp.config import Config
10
+ from llmcomp.runner.model_adapter import ModelAdapter
11
+
12
+
13
+ # -----------------------------------------------------------------------------
14
+ # Base handler: adds model to all requests
15
+ # Note: runner also later adds timeout=Config.timeout
16
+ # -----------------------------------------------------------------------------
17
+
18
+ def base_prepare(params: dict, model: str) -> dict:
19
+ return {
20
+ "model": model,
21
+ **params,
22
+ }
23
+
24
+
25
+ ModelAdapter.register(lambda model: True, base_prepare)
26
+
27
+
28
+ # -----------------------------------------------------------------------------
29
+ # Reasoning effort: adds reasoning_effort from Config for reasoning models
30
+ # -----------------------------------------------------------------------------
31
+
32
+ def supports_reasoning_effort(model: str) -> bool:
33
+ """o1, o3, o4 series and gpt-5 series."""
34
+ return (
35
+ model.startswith("o1")
36
+ or model.startswith("o3")
37
+ or model.startswith("o4")
38
+ or model.startswith("gpt-5")
39
+ )
40
+
41
+
42
+ def reasoning_effort_prepare(params: dict, model: str) -> dict:
43
+ return {
44
+ "reasoning_effort": Config.reasoning_effort,
45
+ **params,
46
+ }
47
+
48
+
49
+ ModelAdapter.register(supports_reasoning_effort, reasoning_effort_prepare)
50
+
51
+
52
+ # -----------------------------------------------------------------------------
53
+ # Max completion tokens: converts max_tokens to max_completion_tokens
54
+ # -----------------------------------------------------------------------------
55
+
56
+ def requires_max_completion_tokens(model: str) -> bool:
57
+ """o-series models (o1, o3, o4) and gpt-5 series don't support max_tokens."""
58
+ return (
59
+ model.startswith("o1")
60
+ or model.startswith("o3")
61
+ or model.startswith("o4")
62
+ or model.startswith("gpt-5")
63
+ )
64
+
65
+
66
+ def max_completion_tokens_prepare(params: dict, model: str) -> dict:
67
+ if "max_tokens" not in params:
68
+ return params
69
+ if "max_completion_tokens" in params:
70
+ # User explicitly set max_completion_tokens, just remove max_tokens
71
+ result = dict(params)
72
+ del result["max_tokens"]
73
+ return result
74
+ # Convert max_tokens to max_completion_tokens
75
+ result = dict(params)
76
+ result["max_completion_tokens"] = result.pop("max_tokens")
77
+ return result
78
+
79
+
80
+ ModelAdapter.register(requires_max_completion_tokens, max_completion_tokens_prepare)
81
+
@@ -0,0 +1,2 @@
1
+ from llmcomp.finetuning.manager import FinetuningManager
2
+
@@ -0,0 +1,473 @@
1
+ import hashlib
2
+ import os
3
+
4
+ import openai
5
+ import pandas as pd
6
+
7
+ from llmcomp.utils import read_jsonl, write_jsonl
8
+
9
+ DEFAULT_DATA_DIR = "llmcomp_models"
10
+
11
+
12
+ class FinetuningManager:
13
+ """Manage finetuning runs on OpenAI.
14
+
15
+ * Create FT jobs via `create_job`
16
+ * Fetch updates to FT jobs via `update_jobs`
17
+ * Get a list of models via `get_models` or `get_model_list`
18
+ """
19
+
20
+ # Cache: api_key -> organization_id
21
+ _org_cache: dict[str, str] = {}
22
+
23
+ #########################################################
24
+ # PUBLIC INTERFACE
25
+ def get_model_list(self, data_dir: str = DEFAULT_DATA_DIR, **kwargs) -> list[str]:
26
+ return self.get_models(data_dir, **kwargs)["model"].tolist()
27
+
28
+ def get_models(self, data_dir: str = DEFAULT_DATA_DIR, **kwargs) -> pd.DataFrame:
29
+ """Returns a dataframe with all the current models matching the given filters.
30
+
31
+ Or just all models if there are no filters.
32
+
33
+ Example usage:
34
+
35
+ models = FinetuningManager().get_models(
36
+ base_model="gpt-4.1-mini-2025-04-14",
37
+ suffix="my-suffix",
38
+ )
39
+
40
+ NOTE: if it looks like some new models are missing, maybe you need to run `update_jobs` first.
41
+ """
42
+ all_models = self._get_all_models(data_dir)
43
+
44
+ mask = pd.Series(True, index=all_models.index)
45
+ for col, val in kwargs.items():
46
+ mask &= all_models[col] == val
47
+
48
+ filtered_df = all_models[mask].copy()
49
+ return filtered_df
50
+
51
+ def update_jobs(self, data_dir: str = DEFAULT_DATA_DIR):
52
+ """Fetch the latest information about all the jobs.
53
+
54
+ It's fine to run this many times - the data is not overwritten.
55
+ Sends requests only for jobs that don't have a final status yet.
56
+
57
+ Usage:
58
+
59
+ FinetuningManager().update_jobs()
60
+
61
+ Or from command line: llmcomp-update-jobs
62
+ """
63
+ jobs_file = os.path.join(data_dir, "jobs.jsonl")
64
+ try:
65
+ jobs = read_jsonl(jobs_file)
66
+ except FileNotFoundError:
67
+ jobs = []
68
+
69
+ # Statuses that mean the job is done (no need to check again)
70
+ final_statuses = {"succeeded", "failed", "cancelled"}
71
+
72
+ counts = {"running": 0, "succeeded": 0, "failed": 0, "newly_completed": 0}
73
+ jobs_without_key = []
74
+
75
+ for job in jobs:
76
+ # Skip jobs that already have a final status
77
+ if job.get("status") in final_statuses:
78
+ if job["status"] == "succeeded":
79
+ counts["succeeded"] += 1
80
+ else:
81
+ counts["failed"] += 1 # failed or cancelled
82
+ continue
83
+
84
+ # Skip jobs that already have a model (succeeded before we tracked status)
85
+ if job.get("model") is not None:
86
+ counts["succeeded"] += 1
87
+ continue
88
+
89
+ # Try all API keys for this organization
90
+ api_keys = self._get_api_keys_for_org(job["organization_id"])
91
+ if not api_keys:
92
+ jobs_without_key.append(job)
93
+ continue
94
+
95
+ job_data = None
96
+ api_key = None
97
+ for key in api_keys:
98
+ try:
99
+ client = openai.OpenAI(api_key=key)
100
+ job_data = client.fine_tuning.jobs.retrieve(job["id"])
101
+ api_key = key
102
+ break
103
+ except Exception:
104
+ continue
105
+
106
+ if job_data is None:
107
+ jobs_without_key.append(job)
108
+ continue
109
+
110
+ status = job_data.status
111
+ job["status"] = status
112
+
113
+ if status == "succeeded":
114
+ counts["succeeded"] += 1
115
+ counts["newly_completed"] += 1
116
+ print(f"✓ {job['suffix']}: succeeded → {job_data.fine_tuned_model}")
117
+
118
+ # Update model
119
+ job["model"] = job_data.fine_tuned_model
120
+
121
+ # Update checkpoints
122
+ checkpoints = self._get_checkpoints(job["id"], api_key)
123
+ if checkpoints:
124
+ assert checkpoints[0]["fine_tuned_model_checkpoint"] == job_data.fine_tuned_model
125
+ for i, checkpoint in enumerate(checkpoints[1:], start=1):
126
+ key_name = f"model-{i}"
127
+ job[key_name] = checkpoint["fine_tuned_model_checkpoint"]
128
+
129
+ # Update seed
130
+ if "seed" not in job or job["seed"] == "auto":
131
+ job["seed"] = job_data.seed
132
+
133
+ # Update hyperparameters
134
+ hyperparameters = job_data.method.supervised.hyperparameters
135
+ if "batch_size" not in job or job["batch_size"] == "auto":
136
+ job["batch_size"] = hyperparameters.batch_size
137
+ if "learning_rate_multiplier" not in job or job["learning_rate_multiplier"] == "auto":
138
+ job["learning_rate_multiplier"] = hyperparameters.learning_rate_multiplier
139
+ if "epochs" not in job or job["epochs"] == "auto":
140
+ job["epochs"] = hyperparameters.n_epochs
141
+
142
+ elif status in ("failed", "cancelled"):
143
+ counts["failed"] += 1
144
+ error_msg = ""
145
+ if job_data.error and job_data.error.message:
146
+ error_msg = f" - {job_data.error.message}"
147
+ print(f"✗ {job['suffix']}: {status}{error_msg}")
148
+
149
+ else:
150
+ # Still running (validating_files, queued, running)
151
+ counts["running"] += 1
152
+ print(f"… {job['suffix']} ({job['base_model']}): {status}")
153
+
154
+ write_jsonl(jobs_file, jobs)
155
+
156
+ # Print summary
157
+ print()
158
+ if counts["running"] > 0:
159
+ print(f"Running: {counts['running']}, Succeeded: {counts['succeeded']}, Failed: {counts['failed']}")
160
+ else:
161
+ print(f"All jobs finished. Succeeded: {counts['succeeded']}, Failed: {counts['failed']}")
162
+
163
+ if jobs_without_key:
164
+ print(f"\n⚠ {len(jobs_without_key)} job(s) could not be checked (no matching API key):")
165
+ for job in jobs_without_key:
166
+ print(f" - {job['suffix']} (org: {job['organization_id']})")
167
+
168
+ # Regenerate models.csv with any newly completed jobs
169
+ self._get_all_models(data_dir)
170
+
171
+ def create_job(
172
+ self,
173
+ api_key: str,
174
+ file_name: str,
175
+ base_model: str,
176
+ suffix: str | None = None,
177
+ epochs: int | str = 1,
178
+ batch_size: int | str = "auto",
179
+ lr_multiplier: float | str = "auto",
180
+ seed: int | None = None,
181
+ data_dir: str = DEFAULT_DATA_DIR,
182
+ ):
183
+ """Create a new finetuning job.
184
+
185
+ Example usage:
186
+
187
+ FinetuningManager().create_job(
188
+ # Required
189
+ api_key=os.environ["OPENAI_API_KEY"],
190
+ file_name="my_dataset.jsonl",
191
+ base_model="gpt-4.1-mini-2025-04-14",
192
+
193
+ # Optional
194
+ suffix="my-suffix",
195
+ epochs=1,
196
+ batch_size="auto",
197
+ lr_multiplier="auto",
198
+ seed=None,
199
+ )
200
+
201
+ """
202
+ if suffix is None:
203
+ suffix = self._get_default_suffix(file_name, lr_multiplier, epochs, batch_size)
204
+
205
+ # Check for suffix collision with different file
206
+ self._check_suffix_collision(suffix, file_name, data_dir)
207
+
208
+ # Get organization_id for this API key
209
+ organization_id = self._get_organization_id(api_key)
210
+
211
+ file_id = self._upload_file_if_not_uploaded(file_name, api_key, organization_id, data_dir)
212
+
213
+ data = {
214
+ "model": base_model,
215
+ "training_file": file_id,
216
+ "seed": seed,
217
+ "suffix": suffix,
218
+ "method": {
219
+ "type": "supervised",
220
+ "supervised": {
221
+ "hyperparameters": {
222
+ "batch_size": batch_size,
223
+ "learning_rate_multiplier": lr_multiplier,
224
+ "n_epochs": epochs,
225
+ }
226
+ },
227
+ },
228
+ }
229
+
230
+ client = openai.OpenAI(api_key=api_key)
231
+ response = client.fine_tuning.jobs.create(**data)
232
+ job_id = response.id
233
+ fname = os.path.join(data_dir, "jobs.jsonl")
234
+ try:
235
+ ft_jobs = read_jsonl(fname)
236
+ except FileNotFoundError:
237
+ ft_jobs = []
238
+
239
+ ft_jobs.append(
240
+ {
241
+ "id": job_id,
242
+ "file_name": file_name,
243
+ "base_model": base_model,
244
+ "suffix": suffix,
245
+ "file_id": file_id,
246
+ "epochs": epochs,
247
+ "batch_size": batch_size,
248
+ "learning_rate_multiplier": lr_multiplier,
249
+ "file_md5": self._get_file_md5(file_name),
250
+ "organization_id": organization_id,
251
+ }
252
+ )
253
+ write_jsonl(fname, ft_jobs)
254
+
255
+ print(f"\n✓ Finetuning job created")
256
+ print(f" Job ID: {job_id}")
257
+ print(f" Base model: {base_model}")
258
+ print(f" Suffix: {suffix}")
259
+ print(f" File: {file_name} (id: {file_id})")
260
+ print(f" Epochs: {epochs}, Batch: {batch_size}, LR: {lr_multiplier}")
261
+ print(f" Status: {response.status}")
262
+ print(f"\nRun `llmcomp-update-jobs` to check progress.")
263
+
264
+ #########################################################
265
+ # PRIVATE METHODS
266
+ def _check_suffix_collision(self, suffix: str, file_name: str, data_dir: str):
267
+ """Raise error if suffix is already used with a different file.
268
+
269
+ This prevents confusion when the same suffix is accidentally used for
270
+ different datasets. It's not technically a problem, but it makes the
271
+ model names ambiguous and you almost certainly don't want this.
272
+ """
273
+ jobs_file = os.path.join(data_dir, "jobs.jsonl")
274
+ try:
275
+ jobs = read_jsonl(jobs_file)
276
+ except FileNotFoundError:
277
+ return # No existing jobs
278
+
279
+ current_md5 = self._get_file_md5(file_name)
280
+
281
+ for job in jobs:
282
+ if job.get("suffix") != suffix:
283
+ continue
284
+
285
+ # Same suffix - check if it's a different file
286
+ if job.get("file_name") != file_name:
287
+ raise ValueError(
288
+ f"Suffix '{suffix}' is already used with a different file:\n"
289
+ f" Existing: {job['file_name']}\n"
290
+ f" New: {file_name}\n\n"
291
+ f"This is probably a mistake. Using the same suffix for different datasets\n"
292
+ f"makes model names ambiguous. Choose a different suffix for this file."
293
+ )
294
+
295
+ # Same file name - check if content changed
296
+ if job.get("file_md5") != current_md5:
297
+ raise ValueError(
298
+ f"Suffix '{suffix}' is already used with file '{file_name}',\n"
299
+ f"but the file content has changed (different MD5).\n\n"
300
+ f"This is probably a mistake. If you modified the dataset, you should\n"
301
+ f"use a different suffix to distinguish the new models."
302
+ )
303
+
304
+ def _get_all_models(self, data_dir: str = DEFAULT_DATA_DIR) -> pd.DataFrame:
305
+ jobs_fname = os.path.join(data_dir, "jobs.jsonl")
306
+ try:
307
+ jobs = read_jsonl(jobs_fname)
308
+ except FileNotFoundError:
309
+ jobs = []
310
+
311
+ models = []
312
+ for job in jobs:
313
+ if job.get("model") is None:
314
+ continue
315
+
316
+ model_data = {
317
+ "model": job["model"],
318
+ "base_model": job["base_model"],
319
+ "file_name": job["file_name"],
320
+ "file_id": job["file_id"],
321
+ "file_md5": job["file_md5"],
322
+ "suffix": job["suffix"],
323
+ "batch_size": job["batch_size"],
324
+ "learning_rate_multiplier": job["learning_rate_multiplier"],
325
+ "epochs": job["epochs"],
326
+ "seed": job["seed"],
327
+ }
328
+ models.append(model_data)
329
+ for i in range(1, 3):
330
+ key = f"model-{i}"
331
+ if key in job:
332
+ checkpoint_data = model_data.copy()
333
+ checkpoint_data["model"] = job[key]
334
+ checkpoint_data["epochs"] -= i
335
+ models.append(checkpoint_data)
336
+
337
+ df = pd.DataFrame(models)
338
+ df.to_csv(os.path.join(data_dir, "models.csv"), index=False)
339
+ return df
340
+
341
+ def _upload_file_if_not_uploaded(self, file_name, api_key, organization_id, data_dir):
342
+ files_fname = os.path.join(data_dir, "files.jsonl")
343
+ try:
344
+ files = read_jsonl(files_fname)
345
+ except FileNotFoundError:
346
+ files = []
347
+
348
+ md5 = self._get_file_md5(file_name)
349
+ for file in files:
350
+ if file["name"] == file_name and file["md5"] == md5 and file["organization_id"] == organization_id:
351
+ print(f"File {file_name} already uploaded. ID: {file['id']}")
352
+ return file["id"]
353
+ return self._upload_file(file_name, api_key, organization_id, data_dir)
354
+
355
+ def _upload_file(self, file_name, api_key, organization_id, data_dir):
356
+ try:
357
+ file_id = self._raw_upload(file_name, api_key)
358
+ except Exception as e:
359
+ raise ValueError(f"Upload failed for {file_name}: {e}")
360
+ files_fname = os.path.join(data_dir, "files.jsonl")
361
+ try:
362
+ files = read_jsonl(files_fname)
363
+ except FileNotFoundError:
364
+ files = []
365
+
366
+ files.append(
367
+ {
368
+ "name": file_name,
369
+ "md5": self._get_file_md5(file_name),
370
+ "id": file_id,
371
+ "organization_id": organization_id,
372
+ }
373
+ )
374
+ write_jsonl(files_fname, files)
375
+ return file_id
376
+
377
+ @staticmethod
378
+ def _raw_upload(file_name, api_key):
379
+ client = openai.OpenAI(api_key=api_key)
380
+ with open(file_name, "rb") as f:
381
+ response = client.files.create(file=f, purpose="fine-tune")
382
+ print(f"Uploaded {file_name} → {response.id}")
383
+ return response.id
384
+
385
+ @staticmethod
386
+ def _get_default_suffix(file_name, lr_multiplier, epochs, batch_size):
387
+ file_id = file_name.split("/")[-1].split(".")[0]
388
+ file_id = file_id.replace("_", "-")
389
+ suffix = f"{file_id}-{lr_multiplier}-{epochs}-{batch_size}"
390
+ if len(suffix) > 64:
391
+ print(f"Suffix is too long: {suffix}. Truncating to 64 characters. New suffix: {suffix[:64]}")
392
+ suffix = suffix[:64]
393
+ return suffix
394
+
395
+ @staticmethod
396
+ def _get_file_md5(file_name):
397
+ with open(file_name, "rb") as f:
398
+ return hashlib.md5(f.read()).hexdigest()
399
+
400
+ @classmethod
401
+ def _get_organization_id(cls, api_key: str) -> str:
402
+ """Get the organization ID for an API key by making a simple API call."""
403
+ if api_key in cls._org_cache:
404
+ return cls._org_cache[api_key]
405
+
406
+ client = openai.OpenAI(api_key=api_key)
407
+ try:
408
+ # Try to list fine-tuning jobs (limit 1) to get org_id from response
409
+ jobs = client.fine_tuning.jobs.list(limit=1)
410
+ if jobs.data:
411
+ org_id = jobs.data[0].organization_id
412
+ else:
413
+ # No jobs yet, try the /v1/organization endpoint
414
+ import requests
415
+
416
+ response = requests.get(
417
+ "https://api.openai.com/v1/organization",
418
+ headers={"Authorization": f"Bearer {api_key}"},
419
+ )
420
+ if response.status_code == 200:
421
+ org_id = response.json().get("id")
422
+ else:
423
+ raise ValueError(
424
+ f"Could not determine organization ID for API key. "
425
+ f"API returned status {response.status_code}"
426
+ )
427
+ except Exception as e:
428
+ raise ValueError(f"Could not determine organization ID: {e}")
429
+
430
+ cls._org_cache[api_key] = org_id
431
+ return org_id
432
+
433
+ @classmethod
434
+ def _get_api_keys_for_org(cls, organization_id: str) -> list[str]:
435
+ """Find all API keys that belong to the given organization."""
436
+ matching_keys = []
437
+ for api_key in cls._get_all_api_keys():
438
+ try:
439
+ org_id = cls._get_organization_id(api_key)
440
+ if org_id == organization_id:
441
+ matching_keys.append(api_key)
442
+ except Exception:
443
+ continue
444
+ return matching_keys
445
+
446
+ @staticmethod
447
+ def _get_all_api_keys() -> list[str]:
448
+ """Get all OpenAI API keys from environment (OPENAI_API_KEY and OPENAI_API_KEY_*)."""
449
+ keys = []
450
+ for env_var in os.environ:
451
+ if env_var == "OPENAI_API_KEY" or env_var.startswith("OPENAI_API_KEY_"):
452
+ key = os.environ.get(env_var)
453
+ if key:
454
+ keys.append(key)
455
+ return keys
456
+
457
+ @staticmethod
458
+ def _get_checkpoints(job_id, api_key):
459
+ # Q: why REST?
460
+ # A: because the Python client doesn't support listing checkpoints
461
+ import requests
462
+
463
+ url = f"https://api.openai.com/v1/fine_tuning/jobs/{job_id}/checkpoints"
464
+ headers = {"Authorization": f"Bearer {api_key}"}
465
+
466
+ response = requests.get(url, headers=headers)
467
+
468
+ if response.status_code == 200:
469
+ data = response.json()["data"]
470
+ data.sort(key=lambda x: x["step_number"], reverse=True)
471
+ return data
472
+ else:
473
+ print(f"Error: {response.status_code} - {response.text}")
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python3
2
+ """Update finetuning jobs.
3
+
4
+ Usage:
5
+ llmcomp-update-jobs [DATA_DIR]
6
+ """
7
+
8
+ import argparse
9
+ import os
10
+ import sys
11
+
12
+ from llmcomp.finetuning.manager import DEFAULT_DATA_DIR, FinetuningManager
13
+
14
+
15
+ def main():
16
+ parser = argparse.ArgumentParser(description="Update finetuning jobs from OpenAI API.")
17
+ parser.add_argument(
18
+ "data_dir",
19
+ nargs="?",
20
+ default=None,
21
+ help=f"Directory containing jobs.jsonl (default: {DEFAULT_DATA_DIR} if it exists)",
22
+ )
23
+ args = parser.parse_args()
24
+
25
+ if args.data_dir is not None:
26
+ data_dir = args.data_dir
27
+ elif os.path.isdir(DEFAULT_DATA_DIR):
28
+ data_dir = DEFAULT_DATA_DIR
29
+ else:
30
+ print(f"Error: Directory '{DEFAULT_DATA_DIR}' not found.", file=sys.stderr)
31
+ print(f"Specify a data directory: llmcomp-update-jobs <DATA_DIR>", file=sys.stderr)
32
+ sys.exit(1)
33
+
34
+ FinetuningManager().update_jobs(data_dir=data_dir)
35
+
36
+
37
+ if __name__ == "__main__":
38
+ main()