folder-classifier 0.3.8__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: folder-classifier
3
- Version: 0.3.8
3
+ Version: 0.4.0
4
4
  Summary: Deploy folder classifier API to a Ray cluster
5
5
  Author: Crispin Almodovar
6
6
  Author-email:
@@ -16,11 +16,13 @@ class FolderClassifierAPI:
16
16
  assert model_config.app_name and model_config.deployment, "Invalid ModelConfig values"
17
17
  logging.basicConfig(level=logging.INFO)
18
18
  self.logger = logging.getLogger(__name__)
19
- self.classifier = FolderClassifier(app_name=model_config.app_name, deployment=model_config.deployment, model=model_config.model)
19
+ self.classifier = FolderClassifier(app_name=model_config.app_name, deployment=model_config.deployment, model=model_config.model,
20
+ fallback_config = model_config.fallback)
20
21
  self.logger.info(f"Successfully initialized Folder Classifier API using config: {model_config}")
21
22
 
22
23
  @web_api.post("/predict")
23
24
  async def predict(self, request: FolderClassificationRequest) -> FolderClassificationResponse:
24
- self.logger.info(f"Received request: {request}")
25
+ self.logger.info(f"Received new request")
25
26
  category, reasoning = await self.classifier.predict(request)
27
+ self.logger.info(f"Request with items: {request.items} classified as '{category}' with reasoning: '{reasoning}'")
26
28
  return FolderClassificationResponse(category=category, reasoning=reasoning)
@@ -1,14 +1,20 @@
1
1
  import json
2
2
  import logging
3
- from typing import Tuple, Dict, Any
3
+ from typing import Tuple, Dict, Any, Optional
4
+ import os
4
5
 
6
+ from openai import AsyncOpenAI
5
7
  from ray import serve
6
8
 
7
- from folder_classifier.dto import FolderClassificationRequest, FolderClassification
9
+ from folder_classifier.dto import FolderClassificationRequest, FolderClassification, FallbackConfig
8
10
  from folder_classifier.util import build_folder, render_tree
9
11
 
10
- SYSTEM_PROMPT = r"""
11
- You are a strict text classifier. Provide a single JSON object with exactly two keys: "category" and "reasoning".
12
+
13
+
14
+
15
+ SYSTEM_PROMPT = """
16
+ You are an expert paralegal. Using only the evidence provided, decide if a root folder and its contents represent a single legal matter for a client.
17
+ Follow only the decision rules included in the user message. Respond with exactly one minified JSON object with exactly two keys: "category" and "reasoning".
12
18
  - "category": either "matter" or "other" (lowercase).
13
19
  - "reasoning": 1–2 short explanation referencing the key rule(s) that decided it.
14
20
  No markdown (no backticks or code blocks) or any extra text outside the JSON. No chain-of-thought explanations or extra keys. If uncertain, choose "other".
@@ -80,17 +86,24 @@ FOLDER_CLASSIFICATION_SCHEMA = FolderClassification.model_json_schema()
80
86
 
81
87
 
82
88
  class FolderClassifier:
83
- def __init__(self, app_name: str, deployment: str, model: str):
89
+ def __init__(self, app_name: str, deployment: str, model: str, fallback_config: Optional[FallbackConfig] = None):
84
90
  self.logger = logging.getLogger(__name__)
85
91
  self.model_handle = serve.get_deployment_handle(app_name=app_name, deployment_name=deployment)
86
92
  self.model = model
87
- self.logger.info(f"Successfully initialized Folder Classifier with remote Ray model: {self.model}")
93
+ self.fallback_config = fallback_config
94
+ self.openai_client = AsyncOpenAI(base_url=self.fallback_config.openai_base_url, api_key=self.fallback_config.openai_api_key) \
95
+ if self.fallback_config else None
96
+
97
+ msg = f"Successfully initialized Folder Classifier with remote Ray model: {self.model}"
98
+ if self.fallback_config:
99
+ msg += f" and fallback - URL: {self.fallback_config.openai_base_url}; model: {self.fallback_config.model}"
100
+ self.logger.info(msg)
88
101
 
89
102
  async def predict(self, request: FolderClassificationRequest) -> Tuple[str, str]:
90
103
  content = ""
91
104
  try:
92
105
  chat_completion_request = self._to_chat_completion_request(request)
93
- response = await self.model_handle.create_chat_completion_internal.remote(chat_completion_request)
106
+ response = await self.run_chat_completion(chat_completion_request)
94
107
  response_dict = json.loads(response.body)
95
108
  content = response_dict["choices"][0]["message"]["content"]
96
109
  result = FolderClassification.model_validate_json(content)
@@ -102,6 +115,17 @@ class FolderClassifier:
102
115
  result = FolderClassification(category="other", reasoning="NA")
103
116
  return result.category, result.reasoning
104
117
 
118
+ async def run_chat_completion(self, chat_completion_request: dict[str, Any]) -> Any:
119
+ response = None
120
+ try:
121
+ response = await self.model_handle.create_chat_completion_internal.remote(chat_completion_request)
122
+ except Exception as ex:
123
+ self.logger.warning(f"Failed to invoke primary model {chat_completion_request['model']}. {ex}")
124
+ if self.fallback_config:
125
+ self.logger.info(f"Invoking fallback OpenAI model: {self.fallback_config.model}")
126
+ response = await self.openai_client.chat.completions.create(**chat_completion_request)
127
+ return response
128
+
105
129
  def _to_chat_completion_request(self, request: FolderClassificationRequest) -> Dict[str, Any]:
106
130
  input_paths = request.items
107
131
  folder = build_folder(input_paths)
@@ -116,7 +140,7 @@ class FolderClassifier:
116
140
  replace("{folder_tree}", folder_tree)}
117
141
  ],
118
142
  "max_tokens": 1024,
119
- "temperature": 0.7,
143
+ "temperature": 0.2,
120
144
  "top_p": 0.8,
121
145
  "response_format": {
122
146
  "type": "json_schema",
@@ -2,11 +2,16 @@ from ray.serve import Application
2
2
 
3
3
  from folder_classifier.app import FolderClassifierAPI
4
4
  from folder_classifier.dto import AppConfig
5
+ from folder_classifier.util import get_openapi_key
5
6
 
6
7
 
7
8
  def build_app(args: AppConfig) -> Application:
8
9
  assert args and args.model, "AppConfig model is required"
9
10
  assert args.model.app_name and args.model.deployment, "Model's app_name and deployment are required"
10
11
 
12
+ if args.model.fallback and args.model.fallback.openai_base_url and args.model.fallback.model:
13
+ if not args.model.fallback.openai_api_key:
14
+ args.model.fallback.openai_api_key = get_openapi_key()
15
+
11
16
  app = FolderClassifierAPI.bind(args.model)
12
17
  return app
@@ -2,10 +2,17 @@ from typing import List, Union, Literal, Optional
2
2
  from pydantic import BaseModel, Field, ConfigDict
3
3
 
4
4
 
5
+ class FallbackConfig(BaseModel):
6
+ openai_base_url: str
7
+ openai_api_key: Optional[str] = None
8
+ model: str
9
+
10
+
5
11
  class ModelConfig(BaseModel):
6
12
  app_name: str
7
13
  deployment: str
8
14
  model: str
15
+ fallback: Optional[FallbackConfig] = None
9
16
 
10
17
 
11
18
  class AppConfig(BaseModel):
@@ -19,7 +26,7 @@ class File(BaseModel):
19
26
 
20
27
  class Folder(BaseModel):
21
28
  name: str
22
- type: Literal["folder"]
29
+ type: Literal["root_folder", "sub_folder"]
23
30
  items: List[Union[File, 'Folder']] = Field(default_factory=list)
24
31
 
25
32
 
@@ -0,0 +1,103 @@
1
+ import os
2
+ from pathlib import PurePosixPath
3
+ from typing import List, Union
4
+
5
+ from folder_classifier.dto import Folder, File
6
+ import boto3
7
+
8
+
9
+ OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gemini-2.0-flash")
10
+ OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://openaiproxy.dev.cortoaws.com/v1")
11
+
12
+ PARAM_STORE_OPENAI_API_KEY = os.getenv("PARAM_STORE_OPENAI_API_KEY", "/AiService/OpenAiSettings/ApiKey")
13
+ AWS_REGION_NAME = os.getenv("AWS_REGION_NAME", "us-west-2")
14
+
15
+
16
+ def get_openapi_key() -> str:
17
+ boto_session = boto3.Session(region_name=AWS_REGION_NAME)
18
+ ssm_client = boto_session.client("ssm")
19
+ response = ssm_client.get_parameter(Name=PARAM_STORE_OPENAI_API_KEY, WithDecryption=True)
20
+ return response["Parameter"]["Value"]
21
+
22
+
23
+ def build_folder(paths: List[str]) -> Folder:
24
+ """
25
+ Create a Folder tree from a list of file paths.
26
+ Assumptions:
27
+ - The file paths are delimited by "/"
28
+ - There are no '.' and '..' entries in the paths
29
+ - The paths are case-insensitive (Windows paths) -> 'ABC' and 'abc' resolve to the same item
30
+ """
31
+ if not paths:
32
+ raise ValueError("No paths provided")
33
+
34
+ # Build a LOWER-CASED directory-prefix set so folder/file disambiguation is case-insensitive.
35
+ prefix_set_lower = set()
36
+ for p in paths:
37
+ parts = p.split('/')
38
+ for i in range(1, len(parts)):
39
+ prefix_set_lower.add('/'.join(parts[:i]).lower())
40
+
41
+ # Sort by depth so parents are created before children
42
+ sorted_paths = sorted(paths, key=lambda x: x.count('/'))
43
+
44
+ # Create the root folder (preserve first-seen casing)
45
+ root_name = sorted_paths[0].split('/')[0]
46
+ root = Folder(name=root_name, type="root_folder", items=[])
47
+
48
+ # Build the tree
49
+ for p in sorted_paths:
50
+ parts = p.split('/')
51
+ current = root
52
+
53
+ for idx, part in enumerate(parts[1:], start=1):
54
+ part_lower = part.lower()
55
+ full_path_lower = '/'.join(parts[:idx + 1]).lower()
56
+ is_last = idx == len(parts) - 1
57
+
58
+ # Case-insensitive lookup of existing child
59
+ existing = next((item for item in current.items if item.name.lower() == part_lower), None)
60
+ if existing:
61
+ if isinstance(existing, Folder):
62
+ current = existing
63
+ continue
64
+
65
+ # Determine type for new item
66
+ if is_last and full_path_lower not in prefix_set_lower:
67
+ if part.strip() in (".", ".."):
68
+ # These won't appear in the paths, ignore if they do.
69
+ continue
70
+ has_ext = bool(PurePosixPath(part).suffix)
71
+ is_dotfile = part.startswith('.') and len(part) > 1
72
+ is_file = has_ext or is_dotfile
73
+ new_item = File(name=part, type="file") if is_file else Folder(name=part, type="sub_folder", items=[])
74
+ else:
75
+ new_item = Folder(name=part, type="sub_folder", items=[])
76
+
77
+ current.items.append(new_item)
78
+ if isinstance(new_item, Folder):
79
+ current = new_item
80
+
81
+ return root
82
+
83
+
84
+ def render_tree(folder: Folder) -> str:
85
+ """
86
+ Render Folder tree using ASCII tree characters (├──, └──, │).
87
+ """
88
+ lines: List[str] = []
89
+
90
+ def recurse(node: Union[Folder, File], prefix: str, is_last: bool):
91
+ connector = "└── " if is_last else "├── "
92
+ lines.append(f"{prefix}{connector}{node.name}")
93
+ if isinstance(node, Folder):
94
+ child_prefix = prefix + (" " if is_last else "│ ")
95
+ for idx, child in enumerate(node.items):
96
+ recurse(child, child_prefix, idx == len(node.items) - 1)
97
+
98
+ # root
99
+ lines.append(folder.name)
100
+ for idx, child in enumerate(folder.items):
101
+ recurse(child, "", idx == len(folder.items) - 1)
102
+
103
+ return "\n".join(lines)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: folder-classifier
3
- Version: 0.3.8
3
+ Version: 0.4.0
4
4
  Summary: Deploy folder classifier API to a Ray cluster
5
5
  Author: Crispin Almodovar
6
6
  Author-email:
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = folder-classifier
3
- version = 0.3.8
3
+ version = 0.4.0
4
4
  author = Crispin Almodovar
5
5
  author_email =
6
6
  description = Deploy folder classifier API to a Ray cluster
@@ -1,95 +0,0 @@
1
- from typing import List, Union
2
-
3
- from folder_classifier.dto import Folder, File
4
-
5
-
6
- def build_folder(paths: List[str]) -> Folder:
7
- """
8
- Create a Folder tree from a list of file paths;
9
- The file paths are delimited by "/" - leaf segments are assumed to be files
10
- """
11
- if not paths:
12
- raise ValueError("No paths provided")
13
-
14
- # Get all directory prefixes
15
- prefix_set = set()
16
- for p in paths:
17
- parts = p.split('/')
18
- for i in range(1, len(parts)):
19
- prefix_set.add('/'.join(parts[:i]))
20
-
21
- # Sort by depth so parents are created before children
22
- sorted_paths = sorted(paths, key=lambda x: x.count('/'))
23
-
24
- # Create root folder
25
- root_name = sorted_paths[0].split('/')[0]
26
- root = Folder(name=root_name, type="folder", items=[])
27
-
28
- # Build the tree
29
- for p in sorted_paths:
30
- parts = p.split('/')
31
- current = root
32
- for idx, part in enumerate(parts[1:], start=1):
33
- full_path = '/'.join(parts[:idx+1])
34
- is_last = idx == len(parts) - 1
35
-
36
- # existing item
37
- existing = next((item for item in current.items if item.name == part), None)
38
- if existing:
39
- if isinstance(existing, Folder):
40
- current = existing
41
- continue
42
-
43
- # Determine type for new item
44
- if is_last and full_path not in prefix_set:
45
- new_item = File(name=part, type="file")
46
- else:
47
- new_item = Folder(name=part, type="folder", items=[])
48
-
49
- current.items.append(new_item)
50
- if isinstance(new_item, Folder):
51
- current = new_item
52
-
53
- return root
54
-
55
-
56
- def render_tree(folder: Folder) -> str:
57
- """
58
- Render Folder tree using ASCII tree characters (├──, └──, │).
59
- """
60
- lines: List[str] = []
61
-
62
- def recurse(node: Union[Folder, File], prefix: str, is_last: bool):
63
- connector = "└── " if is_last else "├── "
64
- lines.append(f"{prefix}{connector}{node.name}")
65
- if isinstance(node, Folder):
66
- child_prefix = prefix + (" " if is_last else "│ ")
67
- for idx, child in enumerate(node.items):
68
- recurse(child, child_prefix, idx == len(node.items) - 1)
69
-
70
- # root
71
- lines.append(folder.name)
72
- for idx, child in enumerate(folder.items):
73
- recurse(child, "", idx == len(folder.items) - 1)
74
-
75
- return "\n".join(lines)
76
-
77
-
78
- def flatten_folder(folder: Folder, parent_path: str = "") -> List[str]:
79
- """
80
- Traverses a Folder and returns a list of file paths.
81
- Each path is constructed by joining folder and file names with '/'.
82
- """
83
- paths: List[str] = []
84
- # Build the path for the current folder
85
- current_path = f"{parent_path}/{folder.name}" if parent_path else folder.name
86
-
87
- for item in folder.items:
88
- if item.type == "file":
89
- paths.append(f"{current_path}/{item.name}")
90
- else:
91
- # Recursively flatten subfolders
92
- paths.extend(flatten_folder(item, current_path))
93
- return paths
94
-
95
-