seekrai 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {seekrai-0.1.0 → seekrai-0.2.0}/PKG-INFO +5 -10
  2. {seekrai-0.1.0 → seekrai-0.2.0}/README.md +3 -8
  3. {seekrai-0.1.0 → seekrai-0.2.0}/pyproject.toml +2 -2
  4. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/abstract/api_requestor.py +3 -0
  5. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/client.py +4 -0
  6. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/constants.py +1 -1
  7. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/filemanager.py +11 -22
  8. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/resources/__init__.py +3 -0
  9. seekrai-0.2.0/src/seekrai/resources/alignment.py +175 -0
  10. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/resources/files.py +64 -4
  11. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/resources/models.py +5 -3
  12. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/types/__init__.py +10 -0
  13. seekrai-0.2.0/src/seekrai/types/alignment.py +39 -0
  14. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/types/files.py +34 -5
  15. {seekrai-0.1.0 → seekrai-0.2.0}/LICENSE +0 -0
  16. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/__init__.py +0 -0
  17. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/abstract/__init__.py +0 -0
  18. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/error.py +0 -0
  19. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/resources/chat/__init__.py +0 -0
  20. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/resources/chat/completions.py +0 -0
  21. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/resources/completions.py +0 -0
  22. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/resources/embeddings.py +0 -0
  23. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/resources/finetune.py +0 -0
  24. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/resources/images.py +0 -0
  25. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/seekrflow_response.py +0 -0
  26. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/types/abstract.py +0 -0
  27. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/types/chat_completions.py +0 -0
  28. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/types/common.py +0 -0
  29. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/types/completions.py +0 -0
  30. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/types/embeddings.py +0 -0
  31. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/types/error.py +0 -0
  32. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/types/finetune.py +0 -0
  33. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/types/images.py +0 -0
  34. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/types/models.py +0 -0
  35. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/utils/__init__.py +0 -0
  36. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/utils/_log.py +0 -0
  37. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/utils/api_helpers.py +0 -0
  38. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/utils/files.py +0 -0
  39. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/utils/tools.py +0 -0
  40. {seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: seekrai
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Python client for SeekrAI
5
5
  Home-page: https://gitlab.cb.ntent.com/ml/seekr-py
6
6
  License: Apache-2.0
@@ -17,7 +17,7 @@ Classifier: Programming Language :: Python :: 3.12
17
17
  Requires-Dist: click (>=8.1.7,<9.0.0)
18
18
  Requires-Dist: eval-type-backport (>=0.1.3,<0.3.0)
19
19
  Requires-Dist: filelock (>=3.13.1,<4.0.0)
20
- Requires-Dist: httpx (>=0.27.0,<0.28.0)
20
+ Requires-Dist: httpx[http2] (>=0.27.0,<0.28.0)
21
21
  Requires-Dist: numpy (>=1.23.5) ; python_version < "3.12"
22
22
  Requires-Dist: numpy (>=1.26.0) ; python_version >= "3.12"
23
23
  Requires-Dist: pillow (>=10.3.0,<11.0.0)
@@ -59,8 +59,6 @@ from seekrai import SeekrFlow
59
59
  client = SeekrFlow(api_key="xxxxx")
60
60
  ```
61
61
 
62
- This library contains both a python library and a CLI. We'll demonstrate how to use both below.
63
-
64
62
  # Usage – Python Client
65
63
 
66
64
  ## Chat Completions
@@ -128,7 +126,7 @@ asyncio.run(async_chat_completion(messages))
128
126
 
129
127
  ## Files
130
128
 
131
- The files API is used for fine-tuning and allows developers to upload data to fine-tune on. It also has several methods to list all files, retrive files, and delete files. Please refer to our fine-tuning docs [here](https://docs.seekrflow.ai/docs/fine-tuning-python).
129
+ The files API is used for fine-tuning and allows developers to upload data to fine-tune on. It also has several methods to list all files, retrieve files, and delete files
132
130
 
133
131
  ```python
134
132
  import os
@@ -136,15 +134,14 @@ from seekrai import SeekrFlow
136
134
 
137
135
  client = SeekrFlow(api_key=os.environ.get("SEEKR_API_KEY"))
138
136
 
139
- client.files.upload(file="somedata.jsonl") # uploads a file
137
+ client.files.upload(file="somedata.parquet") # uploads a file
140
138
  client.files.list() # lists all uploaded files
141
- client.files.retrieve(id="file-d0d318cb-b7d9-493a-bd70-1cfe089d3815") # retrieves a specific file
142
139
  client.files.delete(id="file-d0d318cb-b7d9-493a-bd70-1cfe089d3815") # deletes a file
143
140
  ```
144
141
 
145
142
  ## Fine-tunes
146
143
 
147
- The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrive statuses and get checkpoints. Please refer to our fine-tuning docs [here](https://docs.seekrflow.ai/docs/fine-tuning-python).
144
+ The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrieve statuses and get checkpoints.
148
145
 
149
146
  ```python
150
147
  import os
@@ -160,10 +157,8 @@ client.fine_tuning.create(
160
157
  batch_size=4,
161
158
  learning_rate=1e-5,
162
159
  suffix='my-demo-finetune',
163
- wandb_api_key='1a2b3c4d5e.......',
164
160
  )
165
161
  client.fine_tuning.list() # lists all fine-tuned jobs
166
162
  client.fine_tuning.retrieve(id="ft-c66a5c18-1d6d-43c9-94bd-32d756425b4b") # retrieves information on finetune event
167
- client.fine_tuning.list_events(id="ft-c66a5c18-1d6d-43c9-94bd-32d756425b4b") # Lists events of a fine-tune job
168
163
  ```
169
164
 
@@ -26,8 +26,6 @@ from seekrai import SeekrFlow
26
26
  client = SeekrFlow(api_key="xxxxx")
27
27
  ```
28
28
 
29
- This library contains both a python library and a CLI. We'll demonstrate how to use both below.
30
-
31
29
  # Usage – Python Client
32
30
 
33
31
  ## Chat Completions
@@ -95,7 +93,7 @@ asyncio.run(async_chat_completion(messages))
95
93
 
96
94
  ## Files
97
95
 
98
- The files API is used for fine-tuning and allows developers to upload data to fine-tune on. It also has several methods to list all files, retrive files, and delete files. Please refer to our fine-tuning docs [here](https://docs.seekrflow.ai/docs/fine-tuning-python).
96
+ The files API is used for fine-tuning and allows developers to upload data to fine-tune on. It also has several methods to list all files, retrieve files, and delete files
99
97
 
100
98
  ```python
101
99
  import os
@@ -103,15 +101,14 @@ from seekrai import SeekrFlow
103
101
 
104
102
  client = SeekrFlow(api_key=os.environ.get("SEEKR_API_KEY"))
105
103
 
106
- client.files.upload(file="somedata.jsonl") # uploads a file
104
+ client.files.upload(file="somedata.parquet") # uploads a file
107
105
  client.files.list() # lists all uploaded files
108
- client.files.retrieve(id="file-d0d318cb-b7d9-493a-bd70-1cfe089d3815") # retrieves a specific file
109
106
  client.files.delete(id="file-d0d318cb-b7d9-493a-bd70-1cfe089d3815") # deletes a file
110
107
  ```
111
108
 
112
109
  ## Fine-tunes
113
110
 
114
- The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrive statuses and get checkpoints. Please refer to our fine-tuning docs [here](https://docs.seekrflow.ai/docs/fine-tuning-python).
111
+ The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrieve statuses and get checkpoints.
115
112
 
116
113
  ```python
117
114
  import os
@@ -127,9 +124,7 @@ client.fine_tuning.create(
127
124
  batch_size=4,
128
125
  learning_rate=1e-5,
129
126
  suffix='my-demo-finetune',
130
- wandb_api_key='1a2b3c4d5e.......',
131
127
  )
132
128
  client.fine_tuning.list() # lists all fine-tuned jobs
133
129
  client.fine_tuning.retrieve(id="ft-c66a5c18-1d6d-43c9-94bd-32d756425b4b") # retrieves information on finetune event
134
- client.fine_tuning.list_events(id="ft-c66a5c18-1d6d-43c9-94bd-32d756425b4b") # Lists events of a fine-tune job
135
130
  ```
@@ -14,7 +14,7 @@ build-backend = "poetry.core.masonry.api"
14
14
 
15
15
  [tool.poetry]
16
16
  name = "seekrai"
17
- version = "0.1.0"
17
+ version = "0.2.0"
18
18
  authors = [
19
19
  "SeekrFlow <support@seekr.com>"
20
20
  ]
@@ -46,7 +46,7 @@ numpy = [
46
46
  { version = ">=1.23.5", python = "<3.12" },
47
47
  { version = ">=1.26.0", python = ">=3.12" },
48
48
  ]
49
- httpx = "^0.27.0"
49
+ httpx = {extras = ["http2"], version = "^0.27.0"}
50
50
 
51
51
  [tool.poetry.group.quality]
52
52
  optional = true
@@ -519,6 +519,9 @@ class APIRequestor:
519
519
  if "text/plain" in rheaders.get("Content-Type", ""):
520
520
  data: Dict[str, Any] = {"message": rbody}
521
521
  else:
522
+ if rbody.strip().endswith("[DONE]"):
523
+ # TODO
524
+ rbody = rbody.replace("data: [DONE]", "")
522
525
  data = json.loads(rbody)
523
526
  except (JSONDecodeError, UnicodeDecodeError) as e:
524
527
  raise error.APIError(
@@ -18,6 +18,7 @@ class SeekrFlow:
18
18
  images: resources.Images
19
19
  models: resources.Models
20
20
  fine_tuning: resources.FineTuning
21
+ alignment: resources.Alignment
21
22
 
22
23
  # client options
23
24
  client: SeekrFlowClient
@@ -77,6 +78,7 @@ class SeekrFlow:
77
78
  self.images = resources.Images(self.client)
78
79
  self.models = resources.Models(self.client)
79
80
  self.fine_tuning = resources.FineTuning(self.client)
81
+ self.alignment = resources.Alignment(self.client)
80
82
 
81
83
 
82
84
  class AsyncSeekrFlow:
@@ -87,6 +89,7 @@ class AsyncSeekrFlow:
87
89
  images: resources.AsyncImages
88
90
  models: resources.AsyncModels
89
91
  fine_tuning: resources.AsyncFineTuning
92
+ alignment: resources.AsyncAlignment
90
93
 
91
94
  # client options
92
95
  client: SeekrFlowClient
@@ -146,6 +149,7 @@ class AsyncSeekrFlow:
146
149
  self.images = resources.AsyncImages(self.client)
147
150
  self.models = resources.AsyncModels(self.client)
148
151
  self.fine_tuning = resources.AsyncFineTuning(self.client)
152
+ self.alignment = resources.AsyncAlignment(self.client)
149
153
 
150
154
 
151
155
  Client = SeekrFlow
@@ -19,7 +19,7 @@ INITIAL_RETRY_DELAY = float(env_or_default("INITIAL_RETRY_DELAY", 0.5))
19
19
  MAX_RETRY_DELAY = float(env_or_default("MAX_RETRY_DELAY", 8.0))
20
20
 
21
21
  # API defaults
22
- BASE_URL = env_or_default("BASE_URL", "https://build.seekr.com/v1")
22
+ BASE_URL = env_or_default("BASE_URL", "https://flow.seekr.com/v1")
23
23
 
24
24
  # Download defaults
25
25
  DOWNLOAD_BLOCK_SIZE = int(
@@ -12,6 +12,7 @@ import httpx
12
12
  import requests
13
13
  from requests.structures import CaseInsensitiveDict
14
14
  from tqdm import tqdm
15
+ from tqdm.utils import CallbackIOWrapper
15
16
 
16
17
  import seekrai.utils
17
18
  from seekrai.abstract import api_requestor
@@ -42,15 +43,15 @@ def chmod_and_replace(src: Path, dst: Path) -> None:
42
43
  # Get umask by creating a temporary file in the cache folder.
43
44
  tmp_file = dst.parent / f"tmp_{uuid.uuid4()}"
44
45
 
45
- try:
46
- tmp_file.touch()
46
+ # try:
47
+ tmp_file.touch()
47
48
 
48
- cache_dir_mode = Path(tmp_file).stat().st_mode
49
+ cache_dir_mode = Path(tmp_file).stat().st_mode
49
50
 
50
- os.chmod(src.as_posix(), stat.S_IMODE(cache_dir_mode))
51
+ os.chmod(src.as_posix(), stat.S_IMODE(cache_dir_mode))
51
52
 
52
- finally:
53
- tmp_file.unlink()
53
+ # finally:
54
+ # tmp_file.unlink()
54
55
 
55
56
  shutil.move(src.as_posix(), dst.as_posix())
56
57
 
@@ -186,10 +187,7 @@ class DownloadManager:
186
187
  url, output, remote_name, fetch_metadata
187
188
  )
188
189
 
189
- # Prevent parallel downloads of the same file with a lock.
190
- lock_path = Path(file_path.as_posix() + ".lock")
191
-
192
- with tempfile.NamedTemporaryFile() as temp_file:
190
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
193
191
  response = requestor.request_raw(
194
192
  options=SeekrFlowRequest(
195
193
  method="GET",
@@ -201,7 +199,6 @@ class DownloadManager:
201
199
  try:
202
200
  response.raise_for_status()
203
201
  except Exception as e:
204
- os.remove(lock_path)
205
202
  raise APIError(
206
203
  "Error downloading file", http_status=response.status_code
207
204
  ) from e
@@ -234,8 +231,6 @@ class DownloadManager:
234
231
  # Moves temp file to output file path
235
232
  chmod_and_replace(Path(temp_file.name), file_path)
236
233
 
237
- os.remove(lock_path)
238
-
239
234
  return str(file_path.resolve()), file_size
240
235
 
241
236
 
@@ -323,13 +318,6 @@ class UploadManager:
323
318
  client=self._client,
324
319
  )
325
320
 
326
- if redirect:
327
- if file.suffix not in [".jsonl", ".parquet", ".pt"]:
328
- raise FileTypeError(
329
- f"Unknown extension of file {file}. "
330
- "Only files with extensions .jsonl, .parquet, and .pt are supported."
331
- )
332
-
333
321
  file_size = os.stat(file.as_posix()).st_size
334
322
 
335
323
  with tqdm(
@@ -338,13 +326,14 @@ class UploadManager:
338
326
  unit_scale=True,
339
327
  desc=f"Uploading file {file.name}",
340
328
  disable=bool(DISABLE_TQDM),
341
- ):
329
+ ) as t:
342
330
  with file.open("rb") as f:
331
+ reader_wrapper = CallbackIOWrapper(t.update, f, "read")
343
332
  response, _, _ = requestor.request(
344
333
  options=SeekrFlowRequest(
345
334
  method="PUT",
346
335
  url=url,
347
- files={"files": f, "filename": file.name},
336
+ files={"files": reader_wrapper, "filename": file.name},
348
337
  params={"purpose": purpose.value},
349
338
  ),
350
339
  )
@@ -1,3 +1,4 @@
1
+ from seekrai.resources.alignment import Alignment, AsyncAlignment
1
2
  from seekrai.resources.chat import AsyncChat, Chat
2
3
  from seekrai.resources.completions import AsyncCompletions, Completions
3
4
  from seekrai.resources.embeddings import AsyncEmbeddings, Embeddings
@@ -8,6 +9,8 @@ from seekrai.resources.models import AsyncModels, Models
8
9
 
9
10
 
10
11
  __all__ = [
12
+ "AsyncAlignment",
13
+ "Alignment",
11
14
  "AsyncCompletions",
12
15
  "Completions",
13
16
  "AsyncChat",
@@ -0,0 +1,175 @@
1
+ from typing import List
2
+
3
+ from seekrai.abstract import api_requestor
4
+ from seekrai.seekrflow_response import SeekrFlowResponse
5
+ from seekrai.types import (
6
+ AlignmentList,
7
+ AlignmentRequest,
8
+ AlignmentResponse,
9
+ SeekrFlowClient,
10
+ SeekrFlowRequest,
11
+ )
12
+
13
+
14
+ class Alignment:
15
+ def __init__(self, client: SeekrFlowClient) -> None:
16
+ self._client = client
17
+
18
+ def generate(
19
+ self,
20
+ instructions: str,
21
+ files: List[str],
22
+ ) -> AlignmentResponse:
23
+ requestor = api_requestor.APIRequestor(
24
+ client=self._client,
25
+ )
26
+
27
+ parameter_payload = AlignmentRequest(
28
+ instructions=instructions,
29
+ files=files,
30
+ ).model_dump()
31
+
32
+ response, _, _ = requestor.request(
33
+ options=SeekrFlowRequest(
34
+ method="POST",
35
+ url="flow/alignment/generate",
36
+ params=parameter_payload,
37
+ ),
38
+ stream=False,
39
+ )
40
+
41
+ assert isinstance(response, SeekrFlowResponse)
42
+ return AlignmentResponse(**response.data)
43
+
44
+ def list(self) -> AlignmentList:
45
+ """
46
+ Lists alignment job history
47
+
48
+ Returns:
49
+ AlignmentList: Object containing a list of alignment jobs
50
+ """
51
+
52
+ requestor = api_requestor.APIRequestor(
53
+ client=self._client,
54
+ )
55
+
56
+ response, _, _ = requestor.request(
57
+ options=SeekrFlowRequest(
58
+ method="GET",
59
+ url="flow/alignment",
60
+ ),
61
+ stream=False,
62
+ )
63
+
64
+ assert isinstance(response, SeekrFlowResponse)
65
+
66
+ return AlignmentList(**response.data)
67
+
68
+ def retrieve(self, id: str) -> AlignmentResponse:
69
+ """
70
+ Retrieves alignment job details
71
+
72
+ Args:
73
+ id (str): Alignment job ID to retrieve.
74
+
75
+ Returns:
76
+ AlignmentResponse: Object containing information about alignment job.
77
+ """
78
+
79
+ requestor = api_requestor.APIRequestor(
80
+ client=self._client,
81
+ )
82
+
83
+ response, _, _ = requestor.request(
84
+ options=SeekrFlowRequest(
85
+ method="GET",
86
+ url=f"flow/alignment/{id}",
87
+ ),
88
+ stream=False,
89
+ )
90
+
91
+ assert isinstance(response, SeekrFlowResponse)
92
+
93
+ return AlignmentResponse(**response.data)
94
+
95
+
96
+ class AsyncAlignment:
97
+ def __init__(self, client: SeekrFlowClient) -> None:
98
+ self._client = client
99
+
100
+ async def generate(
101
+ self,
102
+ instructions: str,
103
+ files: List[str],
104
+ ) -> AlignmentResponse:
105
+ requestor = api_requestor.APIRequestor(
106
+ client=self._client,
107
+ )
108
+
109
+ parameter_payload = AlignmentRequest(
110
+ instructions=instructions,
111
+ files=files,
112
+ ).model_dump()
113
+
114
+ response, _, _ = await requestor.arequest(
115
+ options=SeekrFlowRequest(
116
+ method="POST",
117
+ url="flow/alignment/generate",
118
+ params=parameter_payload,
119
+ ),
120
+ stream=False,
121
+ )
122
+
123
+ assert isinstance(response, SeekrFlowResponse)
124
+ return AlignmentResponse(**response.data)
125
+
126
+ async def list(self) -> AlignmentList:
127
+ """
128
+ Lists alignment job history
129
+
130
+ Returns:
131
+ AlignmentList: Object containing a list of alignment jobs
132
+ """
133
+
134
+ requestor = api_requestor.APIRequestor(
135
+ client=self._client,
136
+ )
137
+
138
+ response, _, _ = await requestor.arequest(
139
+ options=SeekrFlowRequest(
140
+ method="GET",
141
+ url="flow/alignment",
142
+ ),
143
+ stream=False,
144
+ )
145
+
146
+ assert isinstance(response, SeekrFlowResponse)
147
+
148
+ return AlignmentList(**response.data)
149
+
150
+ async def retrieve(self, id: str) -> AlignmentResponse:
151
+ """
152
+ Retrieves alignment job details
153
+
154
+ Args:
155
+ id (str): Alignment job ID to retrieve.
156
+
157
+ Returns:
158
+ AlignmentResponse: Object containing information about alignment job.
159
+ """
160
+
161
+ requestor = api_requestor.APIRequestor(
162
+ client=self._client,
163
+ )
164
+
165
+ response, _, _ = await requestor.arequest(
166
+ options=SeekrFlowRequest(
167
+ method="GET",
168
+ url=f"flow/alignment/{id}",
169
+ ),
170
+ stream=False,
171
+ )
172
+
173
+ assert isinstance(response, SeekrFlowResponse)
174
+
175
+ return AlignmentResponse(**response.data)
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from pathlib import Path
4
+ from typing import Any, Dict
4
5
 
5
6
  from seekrai.abstract import api_requestor
6
7
  from seekrai.filemanager import DownloadManager, UploadManager
@@ -14,6 +15,10 @@ from seekrai.types import (
14
15
  SeekrFlowClient,
15
16
  SeekrFlowRequest,
16
17
  )
18
+ from seekrai.types.files import (
19
+ AlignFileMetadataValidationReq,
20
+ AlignFileMetadataValidationResp,
21
+ )
17
22
  from seekrai.utils import normalize_key
18
23
 
19
24
 
@@ -21,11 +26,18 @@ class Files:
21
26
  def __init__(self, client: SeekrFlowClient) -> None:
22
27
  self._client = client
23
28
 
29
+ def _get_local_file_metadata(self, file_path: Path) -> Dict[str, Any]:
30
+ suffix = file_path.suffix.lstrip(".")
31
+ size_bytes = int(file_path.stat().st_size)
32
+ return {
33
+ "suffix": suffix,
34
+ "size_bytes": size_bytes,
35
+ "filename": file_path.name,
36
+ }
37
+
24
38
  def upload(
25
39
  self, file: Path | str, *, purpose: FilePurpose | str = FilePurpose.FineTune
26
40
  ) -> FileResponse:
27
- upload_manager = UploadManager(self._client)
28
-
29
41
  if isinstance(file, str):
30
42
  file = Path(file)
31
43
 
@@ -34,7 +46,30 @@ class Files:
34
46
 
35
47
  assert isinstance(purpose, FilePurpose)
36
48
 
37
- return upload_manager.upload("flow/files", file, purpose=purpose, redirect=True)
49
+ # Do the metadata validation (fail fast before uploading) for Alignment purpose
50
+ if purpose == FilePurpose.Alignment:
51
+ file_metadata = self._get_local_file_metadata(file)
52
+ suffix = file_metadata["suffix"]
53
+ size = file_metadata["size_bytes"]
54
+ metadata_validation = self.validate_align_file_metadata(
55
+ purpose,
56
+ suffix,
57
+ size,
58
+ )
59
+
60
+ if not metadata_validation.is_valid:
61
+ assert metadata_validation.errors is not None # To appease linter
62
+ raise ValueError(
63
+ f"Alignment file metadata validation failed: {metadata_validation.errors}"
64
+ )
65
+
66
+ # Upload the file to s3
67
+ upload_manager = UploadManager(self._client)
68
+ file_response = upload_manager.upload(
69
+ "flow/files", file, purpose=purpose, redirect=True
70
+ )
71
+
72
+ return file_response
38
73
 
39
74
  def list(self) -> FileList:
40
75
  requestor = api_requestor.APIRequestor(
@@ -87,7 +122,7 @@ class Files:
87
122
  output = Path(output)
88
123
 
89
124
  downloaded_filename, file_size = download_manager.download(
90
- f"flow/files/{id}/content", output, normalize_key(f"{id}.jsonl")
125
+ f"flow/files/{id}/content", output, normalize_key(id)
91
126
  )
92
127
 
93
128
  return FileObject(
@@ -114,6 +149,31 @@ class Files:
114
149
 
115
150
  return FileDeleteResponse(**response.data)
116
151
 
152
+ def validate_align_file_metadata(
153
+ self,
154
+ purpose: FilePurpose,
155
+ suffix: str,
156
+ size: int,
157
+ ) -> AlignFileMetadataValidationResp:
158
+ requestor = api_requestor.APIRequestor(client=self._client)
159
+
160
+ request_body = AlignFileMetadataValidationReq(
161
+ purpose=purpose,
162
+ suffix=suffix,
163
+ size=size,
164
+ )
165
+
166
+ response, _, _ = requestor.request(
167
+ options=SeekrFlowRequest(
168
+ method="POST",
169
+ url="flow/files/validate_metadata",
170
+ params=request_body.dict(),
171
+ ),
172
+ stream=False,
173
+ )
174
+
175
+ return AlignFileMetadataValidationResp(**response.data)
176
+
117
177
 
118
178
  class AsyncFiles:
119
179
  def __init__(self, client: SeekrFlowClient) -> None:
@@ -5,6 +5,7 @@ from pathlib import Path
5
5
  from typing import Any, List
6
6
 
7
7
  from tqdm import tqdm
8
+ from tqdm.utils import CallbackIOWrapper
8
9
 
9
10
  from seekrai.abstract import api_requestor
10
11
  from seekrai.constants import DISABLE_TQDM
@@ -35,15 +36,16 @@ class Models:
35
36
  total=file_size,
36
37
  unit="B",
37
38
  unit_scale=True,
38
- desc=f"Uploading file {file.name}",
39
+ desc=f"Uploading model file {file.name}",
39
40
  disable=bool(DISABLE_TQDM),
40
- ):
41
+ ) as t:
41
42
  with file.open("rb") as f:
43
+ reader_wrapper = CallbackIOWrapper(t.update, f, "read")
42
44
  response, _, _ = requestor.request(
43
45
  options=SeekrFlowRequest(
44
46
  method="PUT",
45
47
  url="flow/pt-models",
46
- files={"files": f, "filename": file.name},
48
+ files={"files": reader_wrapper, "filename": file.name},
47
49
  params={"purpose": model_type},
48
50
  ),
49
51
  )
@@ -1,4 +1,10 @@
1
1
  from seekrai.types.abstract import SeekrFlowClient
2
+ from seekrai.types.alignment import (
3
+ AlignmentJobStatus,
4
+ AlignmentList,
5
+ AlignmentRequest,
6
+ AlignmentResponse,
7
+ )
2
8
  from seekrai.types.chat_completions import (
3
9
  ChatCompletionChunk,
4
10
  ChatCompletionRequest,
@@ -65,4 +71,8 @@ __all__ = [
65
71
  "ImageResponse",
66
72
  "ModelResponse",
67
73
  "ModelList",
74
+ "AlignmentRequest",
75
+ "AlignmentResponse",
76
+ "AlignmentJobStatus",
77
+ "AlignmentList",
68
78
  ]
@@ -0,0 +1,39 @@
1
+ from datetime import datetime
2
+ from enum import Enum
3
+ from typing import List, Literal, Optional
4
+
5
+ from pydantic import Field
6
+
7
+ from seekrai.types.abstract import BaseModel
8
+
9
+
10
+ class AlignmentRequest(BaseModel):
11
+ instructions: str = Field(
12
+ default=..., description="Task description/instructions for the alignment task"
13
+ )
14
+ files: List[str] = Field(
15
+ default=..., description="List of file ids to use for alignment"
16
+ )
17
+
18
+
19
+ class AlignmentJobStatus(str, Enum):
20
+ STATUS_PENDING = "pending"
21
+ STATUS_QUEUED = "queued"
22
+ STATUS_RUNNING = "running"
23
+ STATUS_CANCEL_REQUESTED = "cancel_requested"
24
+ STATUS_CANCELLED = "cancelled"
25
+ STATUS_FAILED = "failed"
26
+ STATUS_COMPLETED = "completed"
27
+
28
+
29
+ class AlignmentResponse(BaseModel):
30
+ id: Optional[str] = Field(default=..., description="Alignment job ID")
31
+ created_at: datetime | None = None
32
+ status: AlignmentJobStatus | None = None
33
+
34
+
35
+ class AlignmentList(BaseModel):
36
+ # object type
37
+ object: Literal["list"] | None = None
38
+ # list of fine-tune job objects
39
+ data: List[AlignmentResponse] | None = None
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from datetime import datetime
4
4
  from enum import Enum
5
- from typing import List, Literal
5
+ from typing import List, Literal, Optional, Union
6
6
 
7
7
  from seekrai.types.abstract import BaseModel
8
8
  from seekrai.types.common import (
@@ -13,12 +13,42 @@ from seekrai.types.common import (
13
13
  class FilePurpose(str, Enum):
14
14
  FineTune = "fine-tune"
15
15
  PreTrain = "pre-train"
16
+ Alignment = "alignment"
16
17
 
17
18
 
18
- class FileType(str, Enum):
19
+ class TrainingFileType(str, Enum):
19
20
  jsonl = "jsonl"
20
21
  parquet = "parquet"
21
- pytorch = "pt"
22
+ pytorch = "pt" # TODO - this doesnt belong here
23
+
24
+
25
+ class AlignmentFileType(str, Enum):
26
+ HTML = "html"
27
+ MD = "md"
28
+ RST = "rst"
29
+ RTF = "rtf"
30
+ TXT = "txt"
31
+ XML = "xml"
32
+ JSON = "json"
33
+ JSONL = "jsonl"
34
+ CSV = "csv"
35
+ DOC = "doc"
36
+ DOCX = "docx"
37
+ PDF = "pdf"
38
+
39
+
40
+ FileType = Union[TrainingFileType, AlignmentFileType]
41
+
42
+
43
+ class AlignFileMetadataValidationReq(BaseModel):
44
+ purpose: str
45
+ suffix: str
46
+ size: int
47
+
48
+
49
+ class AlignFileMetadataValidationResp(BaseModel):
50
+ is_valid: bool
51
+ errors: Optional[str] = None
22
52
 
23
53
 
24
54
  class FileRequest(BaseModel):
@@ -61,8 +91,7 @@ class FileResponse(BaseModel):
61
91
  filename: str | None = None
62
92
  # file byte size
63
93
  bytes: int | None = None
64
- # JSONL/Parquet line count
65
- line_count: int | None = None
94
+ created_by: str | None = None # TODO - fix this later
66
95
 
67
96
 
68
97
  class FileList(BaseModel):
File without changes
File without changes
File without changes
File without changes