PyPI - folder-classifier - Versions diffs - 0.1.1__tar.gz → 0.2.0__tar.gz - Mend

folder-classifier 0.1.1tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{folder_classifier-0.1.1 → folder_classifier-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: folder-classifier
-Version: 0.1.1
+Version: 0.2.0
 Summary: Deploy folder classifier API to a Ray cluster
 Author: Crispin Almodovar
 Author-email:

{folder_classifier-0.1.1 → folder_classifier-0.2.0}/folder_classifier/app.py RENAMED Viewed

@@ -3,7 +3,10 @@ import logging
 from fastapi import FastAPI
 from ray import serve
-from folder_classifier.dto import ModelConfig, FolderClassificationResponse, FolderClassificationRequest
+from folder_classifier.dto import (ModelConfig, FolderClassificationResponse,
+                                   FolderClassificationRequest, Listing, Folder)
+from folder_classifier.util import flatten_folder
+from folder_classifier import classifier
 web_api = FastAPI(title=f"Folder Classifier API")
@@ -22,6 +25,7 @@ class FolderClassifierAPI:
     @web_api.post("/predict")
     async def predict(self, request: FolderClassificationRequest) -> FolderClassificationResponse:
-        result = ("matter", 0.9)  #await self.model_handle.remote(request)
+        listing = request if isinstance(request, Listing) else Listing(items=flatten_folder(request))
+        category, confidence = classifier.predict(listing)
         self.logger.info(f"Received request: {request}")
-        return FolderClassificationResponse(category=result[0], confidence=result[1])
+        return FolderClassificationResponse(category=category, confidence=confidence)

folder_classifier-0.2.0/folder_classifier/classifier.py ADDED Viewed

@@ -0,0 +1,34 @@
+from typing import Tuple
+import numpy as np
+import torch
+from transformers import pipeline
+from folder_classifier.dto import Listing
+classifier = pipeline(
+    "zero-shot-classification",
+    model="MoritzLaurer/ModernBERT-large-zeroshot-v2.0",
+    torch_dtype=torch.bfloat16,
+    device="cuda"
+)
+candidate_labels = ["legal_matter", "other"]
+def predict(listing: Listing) -> Tuple[str, float]:
+    text = "\n".join(listing.items)
+    hypothesis_template = "This list of files is about {}"
+    prediction = classifier(
+        text,
+        candidate_labels,
+        hypothesis_template=hypothesis_template,
+        multi_label=False,
+    )
+    scores = np.array(prediction["scores"], dtype=float)
+    highest_ix = np.argmax(scores)
+    predicted_label = prediction["labels"][highest_ix]
+    confidence = float(scores[highest_ix])
+    prediction = "matter" if predicted_label == "legal_matter" else "other"
+    return prediction, confidence

{folder_classifier-0.1.1 → folder_classifier-0.2.0}/folder_classifier/dto.py RENAMED Viewed

@@ -25,25 +25,34 @@ class Folder(BaseModel):
     type: Literal["folder"]
     # Discriminated union: 'type' field is used to select between File and Folder
     items: List[Union[File, Folder]] = Field(default_factory=list)
-    # model_config = {
-    #     "json_schema_extra": {
-    #         # Override the OpenAPI example to avoid the default 'string' entry
-    #         "example": dedent("""{
-    #             "name": "string",
-    #             "type": "folder",
-    #             "items": [
-    #                 {
-    #                   "name": "string",
-    #                   "type": "file"
-    #                 }
-    #               ]
-    #             }""")
-    #     }
-    # }
+    model_config = {
+        "json_schema_extra": {
+            # Override the OpenAPI example to avoid the default 'string' entry
+            "example": dedent("""{
+                "name": "string",
+                "type": "folder",
+                "items": [
+                    {
+                      "name": "string",
+                      "type": "file"
+                    }
+                  ]
+                }""")
+        }
+    }
+class Listing(BaseModel):
+    items: List[str]
 Folder.model_rebuild()
-FolderClassificationRequest = Folder
+FolderClassificationRequest = Union[Folder, Listing]
+class ItemsRequest(BaseModel):
+    items: List[Union[File, Folder]]
 class FolderClassificationResponse(BaseModel):
     category: Literal["matter", "other"]

folder_classifier-0.2.0/folder_classifier/util.py ADDED Viewed

@@ -0,0 +1,21 @@
+from typing import List
+from folder_classifier.dto import Folder
+def flatten_folder(folder: Folder, parent_path: str = "") -> List[str]:
+    """
+    Traverses a Folder and returns a list of file paths.
+    Each path is constructed by joining folder and file names with '/'.
+    """
+    paths: List[str] = []
+    # Build the path for the current folder
+    current_path = f"{parent_path}/{folder.name}" if parent_path else folder.name
+    for item in folder.items:
+        if item.type == "file":
+            paths.append(f"{current_path}/{item.name}")
+        else:
+            # Recursively flatten subfolders
+            paths.extend(flatten_folder(item, current_path))
+    return paths

{folder_classifier-0.1.1 → folder_classifier-0.2.0}/folder_classifier.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: folder-classifier
-Version: 0.1.1
+Version: 0.2.0
 Summary: Deploy folder classifier API to a Ray cluster
 Author: Crispin Almodovar
 Author-email:

{folder_classifier-0.1.1 → folder_classifier-0.2.0}/folder_classifier.egg-info/SOURCES.txt RENAMED Viewed

@@ -3,8 +3,10 @@ pyproject.toml
 setup.cfg
 folder_classifier/__init__.py
 folder_classifier/app.py
+folder_classifier/classifier.py
 folder_classifier/deploy.py
 folder_classifier/dto.py
+folder_classifier/util.py
 folder_classifier.egg-info/PKG-INFO
 folder_classifier.egg-info/SOURCES.txt
 folder_classifier.egg-info/dependency_links.txt

{folder_classifier-0.1.1 → folder_classifier-0.2.0}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = folder-classifier
-version = 0.1.1
+version = 0.2.0
 author = Crispin Almodovar
 author_email =
 description = Deploy folder classifier API to a Ray cluster

{folder_classifier-0.1.1 → folder_classifier-0.2.0}/README.md RENAMED Viewed

File without changes

{folder_classifier-0.1.1 → folder_classifier-0.2.0}/folder_classifier/__init__.py RENAMED Viewed

File without changes

{folder_classifier-0.1.1 → folder_classifier-0.2.0}/folder_classifier/deploy.py RENAMED Viewed

File without changes

{folder_classifier-0.1.1 → folder_classifier-0.2.0}/folder_classifier.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{folder_classifier-0.1.1 → folder_classifier-0.2.0}/folder_classifier.egg-info/top_level.txt RENAMED Viewed

File without changes

{folder_classifier-0.1.1 → folder_classifier-0.2.0}/pyproject.toml RENAMED Viewed

File without changes

folder-classifier 0.1.1__tar.gz → 0.2.0__tar.gz

folder-classifier 0.1.1tar.gz → 0.2.0tar.gz