folder-classifier 0.1.1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: folder-classifier
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: Deploy folder classifier API to a Ray cluster
5
5
  Author: Crispin Almodovar
6
6
  Author-email:
@@ -3,7 +3,10 @@ import logging
3
3
  from fastapi import FastAPI
4
4
  from ray import serve
5
5
 
6
- from folder_classifier.dto import ModelConfig, FolderClassificationResponse, FolderClassificationRequest
6
+ from folder_classifier.dto import (ModelConfig, FolderClassificationResponse,
7
+ FolderClassificationRequest, Listing, Folder)
8
+ from folder_classifier.util import flatten_folder
9
+ from folder_classifier import classifier
7
10
 
8
11
  web_api = FastAPI(title=f"Folder Classifier API")
9
12
 
@@ -22,6 +25,7 @@ class FolderClassifierAPI:
22
25
 
23
26
  @web_api.post("/predict")
24
27
  async def predict(self, request: FolderClassificationRequest) -> FolderClassificationResponse:
25
- result = ("matter", 0.9) #await self.model_handle.remote(request)
28
+ listing = request if isinstance(request, Listing) else Listing(items=flatten_folder(request))
29
+ category, confidence = classifier.predict(listing)
26
30
  self.logger.info(f"Received request: {request}")
27
- return FolderClassificationResponse(category=result[0], confidence=result[1])
31
+ return FolderClassificationResponse(category=category, confidence=confidence)
@@ -0,0 +1,34 @@
1
+ from typing import Tuple
2
+
3
+ import numpy as np
4
+ import torch
5
+ from transformers import pipeline
6
+
7
+ from folder_classifier.dto import Listing
8
+
9
+ classifier = pipeline(
10
+ "zero-shot-classification",
11
+ model="MoritzLaurer/ModernBERT-large-zeroshot-v2.0",
12
+ torch_dtype=torch.bfloat16,
13
+ device="cuda"
14
+ )
15
+
16
+ candidate_labels = ["legal_matter", "other"]
17
+
18
+ def predict(listing: Listing) -> Tuple[str, float]:
19
+ text = "\n".join(listing.items)
20
+ hypothesis_template = "This list of files is about {}"
21
+ prediction = classifier(
22
+ text,
23
+ candidate_labels,
24
+ hypothesis_template=hypothesis_template,
25
+ multi_label=False,
26
+ )
27
+ scores = np.array(prediction["scores"], dtype=float)
28
+ highest_ix = np.argmax(scores)
29
+ predicted_label = prediction["labels"][highest_ix]
30
+ confidence = float(scores[highest_ix])
31
+ prediction = "matter" if predicted_label == "legal_matter" else "other"
32
+ return prediction, confidence
33
+
34
+
@@ -25,25 +25,34 @@ class Folder(BaseModel):
25
25
  type: Literal["folder"]
26
26
  # Discriminated union: 'type' field is used to select between File and Folder
27
27
  items: List[Union[File, Folder]] = Field(default_factory=list)
28
- # model_config = {
29
- # "json_schema_extra": {
30
- # # Override the OpenAPI example to avoid the default 'string' entry
31
- # "example": dedent("""{
32
- # "name": "string",
33
- # "type": "folder",
34
- # "items": [
35
- # {
36
- # "name": "string",
37
- # "type": "file"
38
- # }
39
- # ]
40
- # }""")
41
- # }
42
- # }
28
+ model_config = {
29
+ "json_schema_extra": {
30
+ # Override the OpenAPI example to avoid the default 'string' entry
31
+ "example": dedent("""{
32
+ "name": "string",
33
+ "type": "folder",
34
+ "items": [
35
+ {
36
+ "name": "string",
37
+ "type": "file"
38
+ }
39
+ ]
40
+ }""")
41
+ }
42
+ }
43
+
44
+
45
+ class Listing(BaseModel):
46
+ items: List[str]
43
47
 
44
48
 
45
49
  Folder.model_rebuild()
46
- FolderClassificationRequest = Folder
50
+ FolderClassificationRequest = Union[Folder, Listing]
51
+
52
+
53
+ class ItemsRequest(BaseModel):
54
+ items: List[Union[File, Folder]]
55
+
47
56
 
48
57
  class FolderClassificationResponse(BaseModel):
49
58
  category: Literal["matter", "other"]
@@ -0,0 +1,21 @@
1
+ from typing import List
2
+
3
+ from folder_classifier.dto import Folder
4
+
5
+
6
+ def flatten_folder(folder: Folder, parent_path: str = "") -> List[str]:
7
+ """
8
+ Traverses a Folder and returns a list of file paths.
9
+ Each path is constructed by joining folder and file names with '/'.
10
+ """
11
+ paths: List[str] = []
12
+ # Build the path for the current folder
13
+ current_path = f"{parent_path}/{folder.name}" if parent_path else folder.name
14
+
15
+ for item in folder.items:
16
+ if item.type == "file":
17
+ paths.append(f"{current_path}/{item.name}")
18
+ else:
19
+ # Recursively flatten subfolders
20
+ paths.extend(flatten_folder(item, current_path))
21
+ return paths
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: folder-classifier
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: Deploy folder classifier API to a Ray cluster
5
5
  Author: Crispin Almodovar
6
6
  Author-email:
@@ -3,8 +3,10 @@ pyproject.toml
3
3
  setup.cfg
4
4
  folder_classifier/__init__.py
5
5
  folder_classifier/app.py
6
+ folder_classifier/classifier.py
6
7
  folder_classifier/deploy.py
7
8
  folder_classifier/dto.py
9
+ folder_classifier/util.py
8
10
  folder_classifier.egg-info/PKG-INFO
9
11
  folder_classifier.egg-info/SOURCES.txt
10
12
  folder_classifier.egg-info/dependency_links.txt
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = folder-classifier
3
- version = 0.1.1
3
+ version = 0.2.0
4
4
  author = Crispin Almodovar
5
5
  author_email =
6
6
  description = Deploy folder classifier API to a Ray cluster