PyPI - seekrai - Versions diffs - 0.1.0__tar.gz → 0.2.0__tar.gz - Mend

seekrai 0.1.0tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

{seekrai-0.1.0 → seekrai-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: seekrai
-Version: 0.1.0
+Version: 0.2.0
 Summary: Python client for SeekrAI
 Home-page: https://gitlab.cb.ntent.com/ml/seekr-py
 License: Apache-2.0
@@ -17,7 +17,7 @@ Classifier: Programming Language :: Python :: 3.12
 Requires-Dist: click (>=8.1.7,<9.0.0)
 Requires-Dist: eval-type-backport (>=0.1.3,<0.3.0)
 Requires-Dist: filelock (>=3.13.1,<4.0.0)
-Requires-Dist: httpx (>=0.27.0,<0.28.0)
+Requires-Dist: httpx[http2] (>=0.27.0,<0.28.0)
 Requires-Dist: numpy (>=1.23.5) ; python_version < "3.12"
 Requires-Dist: numpy (>=1.26.0) ; python_version >= "3.12"
 Requires-Dist: pillow (>=10.3.0,<11.0.0)
@@ -59,8 +59,6 @@ from seekrai import SeekrFlow
 client = SeekrFlow(api_key="xxxxx")
 ```
-This library contains both a python library and a CLI. We'll demonstrate how to use both below.
 # Usage – Python Client
 ## Chat Completions
@@ -128,7 +126,7 @@ asyncio.run(async_chat_completion(messages))
 ## Files
-The files API is used for fine-tuning and allows developers to upload data to fine-tune on. It also has several methods to list all files, retrive files, and delete files. Please refer to our fine-tuning docs [here](https://docs.seekrflow.ai/docs/fine-tuning-python).
+The files API is used for fine-tuning and allows developers to upload data to fine-tune on. It also has several methods to list all files, retrieve files, and delete files
 ```python
 import os
@@ -136,15 +134,14 @@ from seekrai import SeekrFlow
 client = SeekrFlow(api_key=os.environ.get("SEEKR_API_KEY"))
-client.files.upload(file="somedata.jsonl")  # uploads a file
+client.files.upload(file="somedata.parquet")  # uploads a file
 client.files.list()  # lists all uploaded files
-client.files.retrieve(id="file-d0d318cb-b7d9-493a-bd70-1cfe089d3815")  # retrieves a specific file
 client.files.delete(id="file-d0d318cb-b7d9-493a-bd70-1cfe089d3815")  # deletes a file
 ```
 ## Fine-tunes
-The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrive statuses and get checkpoints. Please refer to our fine-tuning docs [here](https://docs.seekrflow.ai/docs/fine-tuning-python).
+The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrieve statuses and get checkpoints.
 ```python
 import os
@@ -160,10 +157,8 @@ client.fine_tuning.create(
     batch_size=4,
     learning_rate=1e-5,
     suffix='my-demo-finetune',
-    wandb_api_key='1a2b3c4d5e.......',
 )
 client.fine_tuning.list()  # lists all fine-tuned jobs
 client.fine_tuning.retrieve(id="ft-c66a5c18-1d6d-43c9-94bd-32d756425b4b")  # retrieves information on finetune event
-client.fine_tuning.list_events(id="ft-c66a5c18-1d6d-43c9-94bd-32d756425b4b")  # Lists events of a fine-tune job
 ```

{seekrai-0.1.0 → seekrai-0.2.0}/README.md RENAMED Viewed

@@ -26,8 +26,6 @@ from seekrai import SeekrFlow
 client = SeekrFlow(api_key="xxxxx")
 ```
-This library contains both a python library and a CLI. We'll demonstrate how to use both below.
 # Usage – Python Client
 ## Chat Completions
@@ -95,7 +93,7 @@ asyncio.run(async_chat_completion(messages))
 ## Files
-The files API is used for fine-tuning and allows developers to upload data to fine-tune on. It also has several methods to list all files, retrive files, and delete files. Please refer to our fine-tuning docs [here](https://docs.seekrflow.ai/docs/fine-tuning-python).
+The files API is used for fine-tuning and allows developers to upload data to fine-tune on. It also has several methods to list all files, retrieve files, and delete files
 ```python
 import os
@@ -103,15 +101,14 @@ from seekrai import SeekrFlow
 client = SeekrFlow(api_key=os.environ.get("SEEKR_API_KEY"))
-client.files.upload(file="somedata.jsonl")  # uploads a file
+client.files.upload(file="somedata.parquet")  # uploads a file
 client.files.list()  # lists all uploaded files
-client.files.retrieve(id="file-d0d318cb-b7d9-493a-bd70-1cfe089d3815")  # retrieves a specific file
 client.files.delete(id="file-d0d318cb-b7d9-493a-bd70-1cfe089d3815")  # deletes a file
 ```
 ## Fine-tunes
-The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrive statuses and get checkpoints. Please refer to our fine-tuning docs [here](https://docs.seekrflow.ai/docs/fine-tuning-python).
+The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrieve statuses and get checkpoints.
 ```python
 import os
@@ -127,9 +124,7 @@ client.fine_tuning.create(
     batch_size=4,
     learning_rate=1e-5,
     suffix='my-demo-finetune',
-    wandb_api_key='1a2b3c4d5e.......',
 )
 client.fine_tuning.list()  # lists all fine-tuned jobs
 client.fine_tuning.retrieve(id="ft-c66a5c18-1d6d-43c9-94bd-32d756425b4b")  # retrieves information on finetune event
-client.fine_tuning.list_events(id="ft-c66a5c18-1d6d-43c9-94bd-32d756425b4b")  # Lists events of a fine-tune job
 ```

{seekrai-0.1.0 → seekrai-0.2.0}/pyproject.toml RENAMED Viewed

@@ -14,7 +14,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "seekrai"
-version = "0.1.0"
+version = "0.2.0"
 authors = [
     "SeekrFlow <support@seekr.com>"
 ]
@@ -46,7 +46,7 @@ numpy = [
     { version = ">=1.23.5", python = "<3.12" },
     { version = ">=1.26.0", python = ">=3.12" },
 ]
-httpx = "^0.27.0"
+httpx = {extras = ["http2"], version = "^0.27.0"}
 [tool.poetry.group.quality]
 optional = true

{seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/abstract/api_requestor.py RENAMED Viewed

@@ -519,6 +519,9 @@ class APIRequestor:
             if "text/plain" in rheaders.get("Content-Type", ""):
                 data: Dict[str, Any] = {"message": rbody}
             else:
+                if rbody.strip().endswith("[DONE]"):
+                    # TODO
+                    rbody = rbody.replace("data: [DONE]", "")
                 data = json.loads(rbody)
         except (JSONDecodeError, UnicodeDecodeError) as e:
             raise error.APIError(

{seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/client.py RENAMED Viewed

@@ -18,6 +18,7 @@ class SeekrFlow:
     images: resources.Images
     models: resources.Models
     fine_tuning: resources.FineTuning
+    alignment: resources.Alignment
     # client options
     client: SeekrFlowClient
@@ -77,6 +78,7 @@ class SeekrFlow:
         self.images = resources.Images(self.client)
         self.models = resources.Models(self.client)
         self.fine_tuning = resources.FineTuning(self.client)
+        self.alignment = resources.Alignment(self.client)
 class AsyncSeekrFlow:
@@ -87,6 +89,7 @@ class AsyncSeekrFlow:
     images: resources.AsyncImages
     models: resources.AsyncModels
     fine_tuning: resources.AsyncFineTuning
+    alignment: resources.AsyncAlignment
     # client options
     client: SeekrFlowClient
@@ -146,6 +149,7 @@ class AsyncSeekrFlow:
         self.images = resources.AsyncImages(self.client)
         self.models = resources.AsyncModels(self.client)
         self.fine_tuning = resources.AsyncFineTuning(self.client)
+        self.alignment = resources.AsyncAlignment(self.client)
 Client = SeekrFlow

{seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/constants.py RENAMED Viewed

@@ -19,7 +19,7 @@ INITIAL_RETRY_DELAY = float(env_or_default("INITIAL_RETRY_DELAY", 0.5))
 MAX_RETRY_DELAY = float(env_or_default("MAX_RETRY_DELAY", 8.0))
 # API defaults
-BASE_URL = env_or_default("BASE_URL", "https://build.seekr.com/v1")
+BASE_URL = env_or_default("BASE_URL", "https://flow.seekr.com/v1")
 # Download defaults
 DOWNLOAD_BLOCK_SIZE = int(

{seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/filemanager.py RENAMED Viewed

@@ -12,6 +12,7 @@ import httpx
 import requests
 from requests.structures import CaseInsensitiveDict
 from tqdm import tqdm
+from tqdm.utils import CallbackIOWrapper
 import seekrai.utils
 from seekrai.abstract import api_requestor
@@ -42,15 +43,15 @@ def chmod_and_replace(src: Path, dst: Path) -> None:
     # Get umask by creating a temporary file in the cache folder.
     tmp_file = dst.parent / f"tmp_{uuid.uuid4()}"
-    try:
-        tmp_file.touch()
+    # try:
+    tmp_file.touch()
-        cache_dir_mode = Path(tmp_file).stat().st_mode
+    cache_dir_mode = Path(tmp_file).stat().st_mode
-        os.chmod(src.as_posix(), stat.S_IMODE(cache_dir_mode))
+    os.chmod(src.as_posix(), stat.S_IMODE(cache_dir_mode))
-    finally:
-        tmp_file.unlink()
+    # finally:
+    #     tmp_file.unlink()
     shutil.move(src.as_posix(), dst.as_posix())
@@ -186,10 +187,7 @@ class DownloadManager:
             url, output, remote_name, fetch_metadata
         )
-        # Prevent parallel downloads of the same file with a lock.
-        lock_path = Path(file_path.as_posix() + ".lock")
-        with tempfile.NamedTemporaryFile() as temp_file:
+        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
             response = requestor.request_raw(
                 options=SeekrFlowRequest(
                     method="GET",
@@ -201,7 +199,6 @@ class DownloadManager:
             try:
                 response.raise_for_status()
             except Exception as e:
-                os.remove(lock_path)
                 raise APIError(
                     "Error downloading file", http_status=response.status_code
                 ) from e
@@ -234,8 +231,6 @@ class DownloadManager:
             # Moves temp file to output file path
             chmod_and_replace(Path(temp_file.name), file_path)
-        os.remove(lock_path)
         return str(file_path.resolve()), file_size
@@ -323,13 +318,6 @@ class UploadManager:
             client=self._client,
         )
-        if redirect:
-            if file.suffix not in [".jsonl", ".parquet", ".pt"]:
-                raise FileTypeError(
-                    f"Unknown extension of file {file}. "
-                    "Only files with extensions .jsonl, .parquet, and .pt are supported."
-                )
         file_size = os.stat(file.as_posix()).st_size
         with tqdm(
@@ -338,13 +326,14 @@ class UploadManager:
             unit_scale=True,
             desc=f"Uploading file {file.name}",
             disable=bool(DISABLE_TQDM),
-        ):
+        ) as t:
             with file.open("rb") as f:
+                reader_wrapper = CallbackIOWrapper(t.update, f, "read")
                 response, _, _ = requestor.request(
                     options=SeekrFlowRequest(
                         method="PUT",
                         url=url,
-                        files={"files": f, "filename": file.name},
+                        files={"files": reader_wrapper, "filename": file.name},
                         params={"purpose": purpose.value},
                     ),
                 )

{seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/resources/__init__.py RENAMED Viewed

@@ -1,3 +1,4 @@
+from seekrai.resources.alignment import Alignment, AsyncAlignment
 from seekrai.resources.chat import AsyncChat, Chat
 from seekrai.resources.completions import AsyncCompletions, Completions
 from seekrai.resources.embeddings import AsyncEmbeddings, Embeddings
@@ -8,6 +9,8 @@ from seekrai.resources.models import AsyncModels, Models
 __all__ = [
+    "AsyncAlignment",
+    "Alignment",
     "AsyncCompletions",
     "Completions",
     "AsyncChat",

seekrai-0.2.0/src/seekrai/resources/alignment.py ADDED Viewed

@@ -0,0 +1,175 @@
+from typing import List
+from seekrai.abstract import api_requestor
+from seekrai.seekrflow_response import SeekrFlowResponse
+from seekrai.types import (
+    AlignmentList,
+    AlignmentRequest,
+    AlignmentResponse,
+    SeekrFlowClient,
+    SeekrFlowRequest,
+)
+class Alignment:
+    def __init__(self, client: SeekrFlowClient) -> None:
+        self._client = client
+    def generate(
+        self,
+        instructions: str,
+        files: List[str],
+    ) -> AlignmentResponse:
+        requestor = api_requestor.APIRequestor(
+            client=self._client,
+        )
+        parameter_payload = AlignmentRequest(
+            instructions=instructions,
+            files=files,
+        ).model_dump()
+        response, _, _ = requestor.request(
+            options=SeekrFlowRequest(
+                method="POST",
+                url="flow/alignment/generate",
+                params=parameter_payload,
+            ),
+            stream=False,
+        )
+        assert isinstance(response, SeekrFlowResponse)
+        return AlignmentResponse(**response.data)
+    def list(self) -> AlignmentList:
+        """
+        Lists alignment job history
+        Returns:
+            AlignmentList: Object containing a list of alignment jobs
+        """
+        requestor = api_requestor.APIRequestor(
+            client=self._client,
+        )
+        response, _, _ = requestor.request(
+            options=SeekrFlowRequest(
+                method="GET",
+                url="flow/alignment",
+            ),
+            stream=False,
+        )
+        assert isinstance(response, SeekrFlowResponse)
+        return AlignmentList(**response.data)
+    def retrieve(self, id: str) -> AlignmentResponse:
+        """
+        Retrieves alignment job details
+        Args:
+            id (str): Alignment job ID to retrieve.
+        Returns:
+            AlignmentResponse: Object containing information about alignment job.
+        """
+        requestor = api_requestor.APIRequestor(
+            client=self._client,
+        )
+        response, _, _ = requestor.request(
+            options=SeekrFlowRequest(
+                method="GET",
+                url=f"flow/alignment/{id}",
+            ),
+            stream=False,
+        )
+        assert isinstance(response, SeekrFlowResponse)
+        return AlignmentResponse(**response.data)
+class AsyncAlignment:
+    def __init__(self, client: SeekrFlowClient) -> None:
+        self._client = client
+    async def generate(
+        self,
+        instructions: str,
+        files: List[str],
+    ) -> AlignmentResponse:
+        requestor = api_requestor.APIRequestor(
+            client=self._client,
+        )
+        parameter_payload = AlignmentRequest(
+            instructions=instructions,
+            files=files,
+        ).model_dump()
+        response, _, _ = await requestor.arequest(
+            options=SeekrFlowRequest(
+                method="POST",
+                url="flow/alignment/generate",
+                params=parameter_payload,
+            ),
+            stream=False,
+        )
+        assert isinstance(response, SeekrFlowResponse)
+        return AlignmentResponse(**response.data)
+    async def list(self) -> AlignmentList:
+        """
+        Lists alignment job history
+        Returns:
+            AlignmentList: Object containing a list of alignment jobs
+        """
+        requestor = api_requestor.APIRequestor(
+            client=self._client,
+        )
+        response, _, _ = await requestor.arequest(
+            options=SeekrFlowRequest(
+                method="GET",
+                url="flow/alignment",
+            ),
+            stream=False,
+        )
+        assert isinstance(response, SeekrFlowResponse)
+        return AlignmentList(**response.data)
+    async def retrieve(self, id: str) -> AlignmentResponse:
+        """
+        Retrieves alignment job details
+        Args:
+            id (str): Alignment job ID to retrieve.
+        Returns:
+            AlignmentResponse: Object containing information about alignment job.
+        """
+        requestor = api_requestor.APIRequestor(
+            client=self._client,
+        )
+        response, _, _ = await requestor.arequest(
+            options=SeekrFlowRequest(
+                method="GET",
+                url=f"flow/alignment/{id}",
+            ),
+            stream=False,
+        )
+        assert isinstance(response, SeekrFlowResponse)
+        return AlignmentResponse(**response.data)

{seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/resources/files.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 from pathlib import Path
+from typing import Any, Dict
 from seekrai.abstract import api_requestor
 from seekrai.filemanager import DownloadManager, UploadManager
@@ -14,6 +15,10 @@ from seekrai.types import (
     SeekrFlowClient,
     SeekrFlowRequest,
 )
+from seekrai.types.files import (
+    AlignFileMetadataValidationReq,
+    AlignFileMetadataValidationResp,
+)
 from seekrai.utils import normalize_key
@@ -21,11 +26,18 @@ class Files:
     def __init__(self, client: SeekrFlowClient) -> None:
         self._client = client
+    def _get_local_file_metadata(self, file_path: Path) -> Dict[str, Any]:
+        suffix = file_path.suffix.lstrip(".")
+        size_bytes = int(file_path.stat().st_size)
+        return {
+            "suffix": suffix,
+            "size_bytes": size_bytes,
+            "filename": file_path.name,
+        }
     def upload(
         self, file: Path | str, *, purpose: FilePurpose | str = FilePurpose.FineTune
     ) -> FileResponse:
-        upload_manager = UploadManager(self._client)
         if isinstance(file, str):
             file = Path(file)
@@ -34,7 +46,30 @@ class Files:
         assert isinstance(purpose, FilePurpose)
-        return upload_manager.upload("flow/files", file, purpose=purpose, redirect=True)
+        # Do the metadata validation (fail fast before uploading) for Alignment purpose
+        if purpose == FilePurpose.Alignment:
+            file_metadata = self._get_local_file_metadata(file)
+            suffix = file_metadata["suffix"]
+            size = file_metadata["size_bytes"]
+            metadata_validation = self.validate_align_file_metadata(
+                purpose,
+                suffix,
+                size,
+            )
+            if not metadata_validation.is_valid:
+                assert metadata_validation.errors is not None  # To appease linter
+                raise ValueError(
+                    f"Alignment file metadata validation failed: {metadata_validation.errors}"
+                )
+        # Upload the file to s3
+        upload_manager = UploadManager(self._client)
+        file_response = upload_manager.upload(
+            "flow/files", file, purpose=purpose, redirect=True
+        )
+        return file_response
     def list(self) -> FileList:
         requestor = api_requestor.APIRequestor(
@@ -87,7 +122,7 @@ class Files:
             output = Path(output)
         downloaded_filename, file_size = download_manager.download(
-            f"flow/files/{id}/content", output, normalize_key(f"{id}.jsonl")
+            f"flow/files/{id}/content", output, normalize_key(id)
         )
         return FileObject(
@@ -114,6 +149,31 @@ class Files:
         return FileDeleteResponse(**response.data)
+    def validate_align_file_metadata(
+        self,
+        purpose: FilePurpose,
+        suffix: str,
+        size: int,
+    ) -> AlignFileMetadataValidationResp:
+        requestor = api_requestor.APIRequestor(client=self._client)
+        request_body = AlignFileMetadataValidationReq(
+            purpose=purpose,
+            suffix=suffix,
+            size=size,
+        )
+        response, _, _ = requestor.request(
+            options=SeekrFlowRequest(
+                method="POST",
+                url="flow/files/validate_metadata",
+                params=request_body.dict(),
+            ),
+            stream=False,
+        )
+        return AlignFileMetadataValidationResp(**response.data)
 class AsyncFiles:
     def __init__(self, client: SeekrFlowClient) -> None:

{seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/resources/models.py RENAMED Viewed

@@ -5,6 +5,7 @@ from pathlib import Path
 from typing import Any, List
 from tqdm import tqdm
+from tqdm.utils import CallbackIOWrapper
 from seekrai.abstract import api_requestor
 from seekrai.constants import DISABLE_TQDM
@@ -35,15 +36,16 @@ class Models:
             total=file_size,
             unit="B",
             unit_scale=True,
-            desc=f"Uploading file {file.name}",
+            desc=f"Uploading model file {file.name}",
             disable=bool(DISABLE_TQDM),
-        ):
+        ) as t:
             with file.open("rb") as f:
+                reader_wrapper = CallbackIOWrapper(t.update, f, "read")
                 response, _, _ = requestor.request(
                     options=SeekrFlowRequest(
                         method="PUT",
                         url="flow/pt-models",
-                        files={"files": f, "filename": file.name},
+                        files={"files": reader_wrapper, "filename": file.name},
                         params={"purpose": model_type},
                     ),
                 )

{seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/types/__init__.py RENAMED Viewed

@@ -1,4 +1,10 @@
 from seekrai.types.abstract import SeekrFlowClient
+from seekrai.types.alignment import (
+    AlignmentJobStatus,
+    AlignmentList,
+    AlignmentRequest,
+    AlignmentResponse,
+)
 from seekrai.types.chat_completions import (
     ChatCompletionChunk,
     ChatCompletionRequest,
@@ -65,4 +71,8 @@ __all__ = [
     "ImageResponse",
     "ModelResponse",
     "ModelList",
+    "AlignmentRequest",
+    "AlignmentResponse",
+    "AlignmentJobStatus",
+    "AlignmentList",
 ]

seekrai-0.2.0/src/seekrai/types/alignment.py ADDED Viewed

@@ -0,0 +1,39 @@
+from datetime import datetime
+from enum import Enum
+from typing import List, Literal, Optional
+from pydantic import Field
+from seekrai.types.abstract import BaseModel
+class AlignmentRequest(BaseModel):
+    instructions: str = Field(
+        default=..., description="Task description/instructions for the alignment task"
+    )
+    files: List[str] = Field(
+        default=..., description="List of file ids to use for alignment"
+    )
+class AlignmentJobStatus(str, Enum):
+    STATUS_PENDING = "pending"
+    STATUS_QUEUED = "queued"
+    STATUS_RUNNING = "running"
+    STATUS_CANCEL_REQUESTED = "cancel_requested"
+    STATUS_CANCELLED = "cancelled"
+    STATUS_FAILED = "failed"
+    STATUS_COMPLETED = "completed"
+class AlignmentResponse(BaseModel):
+    id: Optional[str] = Field(default=..., description="Alignment job ID")
+    created_at: datetime | None = None
+    status: AlignmentJobStatus | None = None
+class AlignmentList(BaseModel):
+    # object type
+    object: Literal["list"] | None = None
+    # list of fine-tune job objects
+    data: List[AlignmentResponse] | None = None

{seekrai-0.1.0 → seekrai-0.2.0}/src/seekrai/types/files.py RENAMED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 from datetime import datetime
 from enum import Enum
-from typing import List, Literal
+from typing import List, Literal, Optional, Union
 from seekrai.types.abstract import BaseModel
 from seekrai.types.common import (
@@ -13,12 +13,42 @@ from seekrai.types.common import (
 class FilePurpose(str, Enum):
     FineTune = "fine-tune"
     PreTrain = "pre-train"
+    Alignment = "alignment"
-class FileType(str, Enum):
+class TrainingFileType(str, Enum):
     jsonl = "jsonl"
     parquet = "parquet"
-    pytorch = "pt"
+    pytorch = "pt"  # TODO - this doesnt belong here
+class AlignmentFileType(str, Enum):
+    HTML = "html"
+    MD = "md"
+    RST = "rst"
+    RTF = "rtf"
+    TXT = "txt"
+    XML = "xml"
+    JSON = "json"
+    JSONL = "jsonl"
+    CSV = "csv"
+    DOC = "doc"
+    DOCX = "docx"
+    PDF = "pdf"
+FileType = Union[TrainingFileType, AlignmentFileType]
+class AlignFileMetadataValidationReq(BaseModel):
+    purpose: str
+    suffix: str
+    size: int
+class AlignFileMetadataValidationResp(BaseModel):
+    is_valid: bool
+    errors: Optional[str] = None
 class FileRequest(BaseModel):
@@ -61,8 +91,7 @@ class FileResponse(BaseModel):
     filename: str | None = None
     # file byte size
     bytes: int | None = None
-    # JSONL/Parquet line count
-    line_count: int | None = None
+    created_by: str | None = None  # TODO - fix this later
 class FileList(BaseModel):