folder-classifier 0.2.3__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: folder-classifier
3
- Version: 0.2.3
3
+ Version: 0.3.2
4
4
  Summary: Deploy folder classifier API to a Ray cluster
5
5
  Author: Crispin Almodovar
6
6
  Author-email:
@@ -0,0 +1,4 @@
1
+ folder_classifier-0.3.2.dist-info/METADATA,sha256=jHP5fAMkxgCHpCPo2q9Rzc2Vm9sU-33X94vHHfrHPMs,392
2
+ folder_classifier-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
3
+ folder_classifier-0.3.2.dist-info/top_level.txt,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
4
+ folder_classifier-0.3.2.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- from folder_classifier.deploy import build_app
folder_classifier/app.py DELETED
@@ -1,31 +0,0 @@
1
- import logging
2
-
3
- from fastapi import FastAPI
4
- from ray import serve
5
-
6
- from folder_classifier.dto import (ModelConfig, FolderClassificationResponse,
7
- FolderClassificationRequest, Listing, Folder)
8
- from folder_classifier.util import flatten_folder
9
- from folder_classifier import classifier
10
-
11
- web_api = FastAPI(title=f"Folder Classifier API")
12
-
13
-
14
- @serve.deployment
15
- @serve.ingress(web_api)
16
- class FolderClassifierAPI:
17
- def __init__(self, model_config: ModelConfig):
18
- assert model_config, "model_config is required"
19
- assert model_config.app_name and model_config.deployment, "Invalid ModelConfig values"
20
- logging.basicConfig(level=logging.INFO)
21
- self.logger = logging.getLogger(__name__)
22
- self.logger.info(f"Initializing model: {model_config}")
23
- #self.model_handle = serve.get_deployment_handle(app_name=model_config.app_name, deployment_name=model_config.deployment)
24
- self.logger.info(f"Successfully initialized Folder Classifier API")
25
-
26
- @web_api.post("/predict")
27
- async def predict(self, request: FolderClassificationRequest) -> FolderClassificationResponse:
28
- listing = request if isinstance(request, Listing) else Listing(items=flatten_folder(request))
29
- category, confidence = classifier.predict(listing)
30
- self.logger.info(f"Received request: {request}")
31
- return FolderClassificationResponse(category=category, confidence=confidence)
@@ -1,25 +0,0 @@
1
- from typing import Tuple
2
- import torch
3
- from transformers import pipeline
4
- from folder_classifier.dto import Listing
5
-
6
-
7
- classifier = None
8
-
9
-
10
- def predict(listing: Listing) -> Tuple[str, float]:
11
- global classifier
12
- if classifier is None:
13
- classifier = pipeline(
14
- "text-classification",
15
- model="/mnt/cluster_storage/models/corto-ai/ModernBERT-large-folder-classifier",
16
- torch_dtype=torch.bfloat16,
17
- device="cuda"
18
- )
19
- text = "\n".join(listing.items)
20
- prediction = classifier(text)
21
- predicted_label = prediction[0]["label"]
22
- confidence = prediction[0]["score"]
23
- return predicted_label, confidence
24
-
25
-
@@ -1,12 +0,0 @@
1
- from ray.serve import Application
2
-
3
- from folder_classifier.app import FolderClassifierAPI
4
- from folder_classifier.dto import AppConfig
5
-
6
-
7
- def build_app(args: AppConfig) -> Application:
8
- assert args and args.model, "AppConfig model is required"
9
- assert args.model.app_name and args.model.deployment, "Model's app_name and deployment are required"
10
-
11
- app = FolderClassifierAPI.bind(args.model)
12
- return app
folder_classifier/dto.py DELETED
@@ -1,54 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from textwrap import dedent
4
- from typing import List, Union, Literal, Optional
5
-
6
- from pydantic import BaseModel, Field, confloat
7
-
8
-
9
- class ModelConfig(BaseModel):
10
- app_name: str
11
- deployment: str
12
-
13
-
14
- class AppConfig(BaseModel):
15
- model: ModelConfig
16
-
17
-
18
- class File(BaseModel):
19
- name: str
20
- type: Literal["file"]
21
-
22
-
23
- class Folder(BaseModel):
24
- name: str
25
- type: Literal["folder"]
26
- # Discriminated union: 'type' field is used to select between File and Folder
27
- items: Optional[List[Union[File, Folder]]] = Field(default_factory=list)
28
- model_config = {
29
- "json_schema_extra": {
30
- # Override the OpenAPI example to avoid the default 'string' entry
31
- "example": dedent("""{
32
- "name": "string",
33
- "type": "folder",
34
- "items": [
35
- {
36
- "name": "string",
37
- "type": "file"
38
- }
39
- ]
40
- }""")
41
- }
42
- }
43
-
44
-
45
- class Listing(BaseModel):
46
- items: List[str]
47
-
48
-
49
- Folder.model_rebuild()
50
- FolderClassificationRequest = Listing
51
-
52
- class FolderClassificationResponse(BaseModel):
53
- category: Literal["matter", "other"]
54
- confidence: confloat(ge=0.0, le=1.0)
folder_classifier/util.py DELETED
@@ -1,21 +0,0 @@
1
- from typing import List
2
-
3
- from folder_classifier.dto import Folder
4
-
5
-
6
- def flatten_folder(folder: Folder, parent_path: str = "") -> List[str]:
7
- """
8
- Traverses a Folder and returns a list of file paths.
9
- Each path is constructed by joining folder and file names with '/'.
10
- """
11
- paths: List[str] = []
12
- # Build the path for the current folder
13
- current_path = f"{parent_path}/{folder.name}" if parent_path else folder.name
14
-
15
- for item in folder.items:
16
- if item.type == "file":
17
- paths.append(f"{current_path}/{item.name}")
18
- else:
19
- # Recursively flatten subfolders
20
- paths.extend(flatten_folder(item, current_path))
21
- return paths
@@ -1,10 +0,0 @@
1
- folder_classifier/__init__.py,sha256=k0YWZyUNe7myJiKeX0OaXtJ30_3EGE-vsZiAUbqa-3E,46
2
- folder_classifier/app.py,sha256=FqoBp_KQ3yIfoHTagdDTVsLbIj8luCn7en533Q870x8,1443
3
- folder_classifier/classifier.py,sha256=YVKXvNAHny167H0Iv0GlRJEtRMAYwGea3cUVeGC4_sI,668
4
- folder_classifier/deploy.py,sha256=UQTbQjR_JX92Xo8L5EbK4nPg_VJmwWDmzMQSjP3-7iQ,412
5
- folder_classifier/dto.py,sha256=Xb1ozZQkfC45cbBoNOZ8xfkillJWFKXKWQc9CZxppXI,1260
6
- folder_classifier/util.py,sha256=t-ma2suHovfNutogJb9jailRbRpIg4qv-zph3dHb2og,692
7
- folder_classifier-0.2.3.dist-info/METADATA,sha256=MZ_BgIBlPrq86na7t9aMXA8EONA05MaHPofJV1P6hZM,392
8
- folder_classifier-0.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
- folder_classifier-0.2.3.dist-info/top_level.txt,sha256=36ugc9pEbNQ-mnzz4Ot2WVjY3t_LzAN6XOCjDFP4p4k,18
10
- folder_classifier-0.2.3.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- folder_classifier