folder-classifier 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,37 +1,25 @@
1
1
  from typing import Tuple
2
-
3
- import numpy as np
4
2
  import torch
5
3
  from transformers import pipeline
6
-
7
4
  from folder_classifier.dto import Listing
8
5
 
6
+
9
7
  classifier = None
10
8
 
11
- candidate_labels = ["legal_matter", "other"]
12
9
 
13
10
  def predict(listing: Listing) -> Tuple[str, float]:
14
11
  global classifier
15
12
  if classifier is None:
16
13
  classifier = pipeline(
17
- "zero-shot-classification",
18
- model="MoritzLaurer/ModernBERT-large-zeroshot-v2.0",
14
+ "text-classification",
15
+ model="/mnt/cluster_storage/models/corto-ai/ModernBERT-large-folder-classifier",
19
16
  torch_dtype=torch.bfloat16,
20
17
  device="cuda"
21
18
  )
22
19
  text = "\n".join(listing.items)
23
- hypothesis_template = "This list of files is about {}"
24
- prediction = classifier(
25
- text,
26
- candidate_labels,
27
- hypothesis_template=hypothesis_template,
28
- multi_label=False,
29
- )
30
- scores = np.array(prediction["scores"], dtype=float)
31
- highest_ix = np.argmax(scores)
32
- predicted_label = prediction["labels"][highest_ix]
33
- confidence = float(scores[highest_ix])
34
- prediction = "matter" if predicted_label == "legal_matter" else "other"
35
- return prediction, confidence
20
+ prediction = classifier(text)
21
+ predicted_label = prediction[0]["label"]
22
+ confidence = prediction[0]["score"]
23
+ return predicted_label, confidence
36
24
 
37
25
 
folder_classifier/dto.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from textwrap import dedent
4
- from typing import List, Union, Literal
4
+ from typing import List, Union, Literal, Optional
5
5
 
6
6
  from pydantic import BaseModel, Field, confloat
7
7
 
@@ -24,7 +24,7 @@ class Folder(BaseModel):
24
24
  name: str
25
25
  type: Literal["folder"]
26
26
  # Discriminated union: 'type' field is used to select between File and Folder
27
- items: List[Union[File, Folder]] = Field(default_factory=list)
27
+ items: Optional[List[Union[File, Folder]]] = Field(default_factory=list)
28
28
  model_config = {
29
29
  "json_schema_extra": {
30
30
  # Override the OpenAPI example to avoid the default 'string' entry
@@ -49,11 +49,6 @@ class Listing(BaseModel):
49
49
  Folder.model_rebuild()
50
50
  FolderClassificationRequest = Union[Folder, Listing]
51
51
 
52
-
53
- class ItemsRequest(BaseModel):
54
- items: List[Union[File, Folder]]
55
-
56
-
57
52
  class FolderClassificationResponse(BaseModel):
58
53
  category: Literal["matter", "other"]
59
54
  confidence: confloat(ge=0.0, le=1.0)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: folder-classifier
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Deploy folder classifier API to a Ray cluster
5
5
  Author: Crispin Almodovar
6
6
  Author-email:
@@ -0,0 +1,10 @@
1
+ folder_classifier/__init__.py,sha256=k0YWZyUNe7myJiKeX0OaXtJ30_3EGE-vsZiAUbqa-3E,46
2
+ folder_classifier/app.py,sha256=FqoBp_KQ3yIfoHTagdDTVsLbIj8luCn7en533Q870x8,1443
3
+ folder_classifier/classifier.py,sha256=YVKXvNAHny167H0Iv0GlRJEtRMAYwGea3cUVeGC4_sI,668
4
+ folder_classifier/deploy.py,sha256=UQTbQjR_JX92Xo8L5EbK4nPg_VJmwWDmzMQSjP3-7iQ,412
5
+ folder_classifier/dto.py,sha256=8ZhXO8q4OVIXxepOAtQjsYnoVL6yfqQc8WkY9bXEN5Q,1275
6
+ folder_classifier/util.py,sha256=t-ma2suHovfNutogJb9jailRbRpIg4qv-zph3dHb2og,692
7
+ folder_classifier-0.2.2.dist-info/METADATA,sha256=RHGa9U46FIqBKaoKDMlBw1L5yC4K-f-MZDwFd4qp5l0,392
8
+ folder_classifier-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ folder_classifier-0.2.2.dist-info/top_level.txt,sha256=36ugc9pEbNQ-mnzz4Ot2WVjY3t_LzAN6XOCjDFP4p4k,18
10
+ folder_classifier-0.2.2.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- folder_classifier/__init__.py,sha256=k0YWZyUNe7myJiKeX0OaXtJ30_3EGE-vsZiAUbqa-3E,46
2
- folder_classifier/app.py,sha256=FqoBp_KQ3yIfoHTagdDTVsLbIj8luCn7en533Q870x8,1443
3
- folder_classifier/classifier.py,sha256=vt7LxIKpzGcB-74epsQKrEg6CfPuJnoRsQ-qIkO7r8o,1062
4
- folder_classifier/deploy.py,sha256=UQTbQjR_JX92Xo8L5EbK4nPg_VJmwWDmzMQSjP3-7iQ,412
5
- folder_classifier/dto.py,sha256=WFWkYUVn5rrAowObBlMyacbgzHB30km0Y4vsAPjOULc,1326
6
- folder_classifier/util.py,sha256=t-ma2suHovfNutogJb9jailRbRpIg4qv-zph3dHb2og,692
7
- folder_classifier-0.2.1.dist-info/METADATA,sha256=8vaTYk8ZVnqcmZaNAs3AiC9iWXV4XopeuFlquj5CiBg,392
8
- folder_classifier-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
- folder_classifier-0.2.1.dist-info/top_level.txt,sha256=36ugc9pEbNQ-mnzz4Ot2WVjY3t_LzAN6XOCjDFP4p4k,18
10
- folder_classifier-0.2.1.dist-info/RECORD,,