PyPI - folder-classifier - Versions diffs - 0.2.0__tar.gz → 0.2.2__tar.gz - Mend

folder-classifier 0.2.0tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{folder_classifier-0.2.0 → folder_classifier-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: folder-classifier
-Version: 0.2.0
+Version: 0.2.2
 Summary: Deploy folder classifier API to a Ray cluster
 Author: Crispin Almodovar
 Author-email:

folder_classifier-0.2.2/folder_classifier/classifier.py ADDED Viewed

@@ -0,0 +1,25 @@
+from typing import Tuple
+import torch
+from transformers import pipeline
+from folder_classifier.dto import Listing
+classifier = None
+def predict(listing: Listing) -> Tuple[str, float]:
+    global classifier
+    if classifier is None:
+        classifier = pipeline(
+            "text-classification",
+            model="/mnt/cluster_storage/models/corto-ai/ModernBERT-large-folder-classifier",
+            torch_dtype=torch.bfloat16,
+            device="cuda"
+        )
+    text = "\n".join(listing.items)
+    prediction = classifier(text)
+    predicted_label = prediction[0]["label"]
+    confidence = prediction[0]["score"]
+    return predicted_label, confidence

{folder_classifier-0.2.0 → folder_classifier-0.2.2}/folder_classifier/dto.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
 from textwrap import dedent
-from typing import List, Union, Literal
+from typing import List, Union, Literal, Optional
 from pydantic import BaseModel, Field, confloat
@@ -24,7 +24,7 @@ class Folder(BaseModel):
     name: str
     type: Literal["folder"]
     # Discriminated union: 'type' field is used to select between File and Folder
-    items: List[Union[File, Folder]] = Field(default_factory=list)
+    items: Optional[List[Union[File, Folder]]] = Field(default_factory=list)
     model_config = {
         "json_schema_extra": {
             # Override the OpenAPI example to avoid the default 'string' entry
@@ -49,11 +49,6 @@ class Listing(BaseModel):
 Folder.model_rebuild()
 FolderClassificationRequest = Union[Folder, Listing]
-class ItemsRequest(BaseModel):
-    items: List[Union[File, Folder]]
 class FolderClassificationResponse(BaseModel):
     category: Literal["matter", "other"]
     confidence: confloat(ge=0.0, le=1.0)

{folder_classifier-0.2.0 → folder_classifier-0.2.2}/folder_classifier.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: folder-classifier
-Version: 0.2.0
+Version: 0.2.2
 Summary: Deploy folder classifier API to a Ray cluster
 Author: Crispin Almodovar
 Author-email:

{folder_classifier-0.2.0 → folder_classifier-0.2.2}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = folder-classifier
-version = 0.2.0
+version = 0.2.2
 author = Crispin Almodovar
 author_email =
 description = Deploy folder classifier API to a Ray cluster

folder_classifier-0.2.0/folder_classifier/classifier.py DELETED Viewed

@@ -1,34 +0,0 @@
-from typing import Tuple
-import numpy as np
-import torch
-from transformers import pipeline
-from folder_classifier.dto import Listing
-classifier = pipeline(
-    "zero-shot-classification",
-    model="MoritzLaurer/ModernBERT-large-zeroshot-v2.0",
-    torch_dtype=torch.bfloat16,
-    device="cuda"
-)
-candidate_labels = ["legal_matter", "other"]
-def predict(listing: Listing) -> Tuple[str, float]:
-    text = "\n".join(listing.items)
-    hypothesis_template = "This list of files is about {}"
-    prediction = classifier(
-        text,
-        candidate_labels,
-        hypothesis_template=hypothesis_template,
-        multi_label=False,
-    )
-    scores = np.array(prediction["scores"], dtype=float)
-    highest_ix = np.argmax(scores)
-    predicted_label = prediction["labels"][highest_ix]
-    confidence = float(scores[highest_ix])
-    prediction = "matter" if predicted_label == "legal_matter" else "other"
-    return prediction, confidence