hamtaa-texttools 1.1.16__py3-none-any.whl → 1.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,19 +6,24 @@ from pydantic import BaseModel, Field, create_model
6
6
 
7
7
  class ToolOutput(BaseModel):
8
8
  result: Any = None
9
- analysis: str = ""
10
9
  logprobs: list[dict[str, Any]] = []
11
- process: str = ""
10
+ analysis: str = ""
11
+ process: str | None = None
12
12
  processed_at: datetime = datetime.now()
13
- execution_time: float = -1.0
13
+ execution_time: float | None = None
14
14
  errors: list[str] = []
15
15
 
16
16
  def __repr__(self) -> str:
17
- return f"ToolOutput(process='{self.process}', result_type='{type(self.result)}', result='{self.result}', analysis='{self.analysis}', logprobs='{self.logprobs}', errors='{self.errors}', processed_at='{self.processed_at}', execution_time='{self.execution_time}'"
17
+ return f"""
18
+ ToolOutput(process='{self.process}', result_type='{type(self.result)}',
19
+ result='{self.result}', analysis='{self.analysis}',
20
+ logprobs='{self.logprobs}', errors='{self.errors}',
21
+ processed_at='{self.processed_at}', execution_time='{self.execution_time}'
22
+ """
18
23
 
19
24
 
20
25
  class StrOutput(BaseModel):
21
- result: str = Field(..., description="The output string")
26
+ result: str = Field(..., description="The output string", example="text")
22
27
 
23
28
 
24
29
  class BoolOutput(BaseModel):
@@ -37,13 +42,15 @@ class ListDictStrStrOutput(BaseModel):
37
42
  result: list[dict[str, str]] = Field(
38
43
  ...,
39
44
  description="List of dictionaries containing string key-value pairs",
40
- example=[{"text": "Mohammad", "type": "PER"}],
45
+ example=[{"text": "Mohammad", "type": "PER"}, {"text": "Iran", "type": "LOC"}],
41
46
  )
42
47
 
43
48
 
44
49
  class ReasonListStrOutput(BaseModel):
45
50
  reason: str = Field(..., description="Thinking process that led to the output")
46
- result: list[str] = Field(..., description="The output list of strings")
51
+ result: list[str] = Field(
52
+ ..., description="The output list of strings", example=["text_1", "text_2"]
53
+ )
47
54
 
48
55
 
49
56
  class Node(BaseModel):
@@ -51,14 +58,44 @@ class Node(BaseModel):
51
58
  name: str
52
59
  level: int
53
60
  parent_id: int | None
54
- description: str = "No description provided"
61
+ description: str
55
62
 
56
63
 
57
64
  class CategoryTree:
58
65
  def __init__(self, tree_name):
59
- self.root = Node(node_id=0, name=tree_name, level=0, parent_id=None)
60
- self.all_nodes: list[Node] = [self.root]
61
- self.new_id = 1
66
+ self._root = Node(
67
+ node_id=0, name=tree_name, level=0, parent_id=None, description="Root node"
68
+ )
69
+ self._all_nodes: list[Node] = [self._root]
70
+ self._new_id = 1
71
+
72
+ def get_all_nodes(self) -> list[Node]:
73
+ return self._all_nodes
74
+
75
+ def get_level_count(self) -> int:
76
+ return max([item.level for item in self._all_nodes])
77
+
78
+ def get_node(self, identifier: int | str) -> Node | None:
79
+ if isinstance(identifier, str):
80
+ for node in self.get_all_nodes():
81
+ if node.name == identifier:
82
+ return node
83
+ return None
84
+ elif isinstance(identifier, int):
85
+ for node in self.get_all_nodes():
86
+ if node.node_id == identifier:
87
+ return node
88
+ return None
89
+ else:
90
+ return None
91
+
92
+ def get_children(self, parent_node: Node) -> list[Node] | None:
93
+ children = [
94
+ node
95
+ for node in self.get_all_nodes()
96
+ if parent_node.node_id == node.parent_id
97
+ ]
98
+ return children if children else None
62
99
 
63
100
  def add_node(
64
101
  self,
@@ -66,12 +103,12 @@ class CategoryTree:
66
103
  parent_name: str | None = None,
67
104
  description: str | None = None,
68
105
  ) -> None:
69
- if self.find_node(node_name):
106
+ if self.get_node(node_name):
70
107
  raise ValueError(f"{node_name} has been chosen for another category before")
71
108
 
72
109
  if parent_name:
73
- parent_node = self.find_node(parent_name)
74
- if parent_node is None:
110
+ parent_node = self.get_node(parent_name)
111
+ if not parent_node:
75
112
  raise ValueError(f"Parent category '{parent_name}' not found")
76
113
  parent_id = parent_node.node_id
77
114
  level = parent_node.level + 1
@@ -80,61 +117,31 @@ class CategoryTree:
80
117
  parent_id = 0
81
118
 
82
119
  node_data = {
83
- "node_id": self.new_id,
120
+ "node_id": self._new_id,
84
121
  "name": node_name,
85
122
  "level": level,
86
123
  "parent_id": parent_id,
124
+ "description": description if description else "No description provided",
87
125
  }
88
126
 
89
- if description is not None:
90
- node_data["description"] = description
91
-
92
- self.all_nodes.append(Node(**node_data))
93
- self.new_id += 1
94
-
95
- def get_nodes(self) -> list[Node]:
96
- return self.all_nodes
97
-
98
- def get_level_count(self) -> int:
99
- return max([item.level for item in self.all_nodes])
100
-
101
- def find_node(self, identifier: int | str) -> Node | None:
102
- if isinstance(identifier, str):
103
- for node in self.get_nodes():
104
- if node.name == identifier:
105
- return node
106
- return None
107
- elif isinstance(identifier, int):
108
- for node in self.get_nodes():
109
- if node.node_id == identifier:
110
- return node
111
- return None
112
- else:
113
- return None
114
-
115
- def find_children(self, parent_node: Node) -> list[Node] | None:
116
- children = [
117
- node for node in self.get_nodes() if parent_node.node_id == node.parent_id
118
- ]
119
- return children if children else None
127
+ self._all_nodes.append(Node(**node_data))
128
+ self._new_id += 1
120
129
 
121
130
  def remove_node(self, identifier: int | str) -> None:
122
- node = self.find_node(identifier)
131
+ node = self.get_node(identifier)
123
132
 
124
- if node is not None:
133
+ if node:
125
134
  # Remove node's children recursively
126
- children = self.find_children(node)
135
+ children = self.get_children(node)
127
136
 
128
- # Ending condition
129
- if children is None:
130
- self.all_nodes.remove(node)
137
+ if not children:
138
+ self._all_nodes.remove(node)
131
139
  return
132
140
 
133
141
  for child in children:
134
142
  self.remove_node(child.name)
135
143
 
136
- # Remove the node from tree
137
- self.all_nodes.remove(node)
144
+ self._all_nodes.remove(node)
138
145
  else:
139
146
  raise ValueError(f"Node with identifier: '{identifier}' not found.")
140
147
 
@@ -142,7 +149,7 @@ class CategoryTree:
142
149
  def build_dict(node: Node) -> dict:
143
150
  children = [
144
151
  build_dict(child)
145
- for child in self.all_nodes
152
+ for child in self._all_nodes
146
153
  if child.parent_id == node.node_id
147
154
  ]
148
155
  return {
@@ -153,7 +160,7 @@ class CategoryTree:
153
160
  "children": children,
154
161
  }
155
162
 
156
- return {"category_tree": build_dict(self.root)["children"]}
163
+ return {"category_tree": build_dict(self._root)["children"]}
157
164
 
158
165
 
159
166
  # This function is needed to create CategorizerOutput with dynamic categories
@@ -0,0 +1,80 @@
1
+ from functools import lru_cache
2
+ from pathlib import Path
3
+ import yaml
4
+
5
+ from texttools.internals.exceptions import PromptError
6
+
7
+
8
+ class PromptLoader:
9
+ """
10
+ Utility for loading and formatting YAML prompt templates.
11
+
12
+ Responsibilities:
13
+ - Load and parse YAML prompt definitions.
14
+ - Select the right template (by mode, if applicable).
15
+ - Inject variables (`{input}`, plus any extra kwargs) into the templates.
16
+ """
17
+
18
+ MAIN_TEMPLATE = "main_template"
19
+ ANALYZE_TEMPLATE = "analyze_template"
20
+
21
+ @staticmethod
22
+ def _build_format_args(text: str, **extra_kwargs) -> dict[str, str]:
23
+ # Base formatting args
24
+ format_args = {"input": text}
25
+ # Merge extras
26
+ format_args.update(extra_kwargs)
27
+ return format_args
28
+
29
+ # Use lru_cache to load each file once
30
+ @lru_cache(maxsize=32)
31
+ def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
32
+ """
33
+ Loads prompt templates from YAML file with optional mode selection.
34
+ """
35
+ try:
36
+ base_dir = Path(__file__).parent.parent / Path("prompts")
37
+ prompt_path = base_dir / prompt_file
38
+
39
+ if not prompt_path.exists():
40
+ raise PromptError(f"Prompt file not found: {prompt_file}")
41
+
42
+ data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
43
+
44
+ if self.MAIN_TEMPLATE not in data:
45
+ raise PromptError(f"Missing 'main_template' in {prompt_file}")
46
+
47
+ if mode and mode not in data.get(self.MAIN_TEMPLATE, {}):
48
+ raise PromptError(f"Mode '{mode}' not found in {prompt_file}")
49
+
50
+ return {
51
+ self.MAIN_TEMPLATE: data[self.MAIN_TEMPLATE][mode]
52
+ if mode and isinstance(data[self.MAIN_TEMPLATE], dict)
53
+ else data[self.MAIN_TEMPLATE],
54
+ self.ANALYZE_TEMPLATE: data.get(self.ANALYZE_TEMPLATE, {}).get(mode)
55
+ if mode and isinstance(data.get(self.ANALYZE_TEMPLATE), dict)
56
+ else data.get(self.ANALYZE_TEMPLATE, ""),
57
+ }
58
+
59
+ except yaml.YAMLError as e:
60
+ raise PromptError(f"Invalid YAML in {prompt_file}: {e}")
61
+ except Exception as e:
62
+ raise PromptError(f"Failed to load prompt {prompt_file}: {e}")
63
+
64
+ def load(
65
+ self, prompt_file: str, text: str, mode: str, **extra_kwargs
66
+ ) -> dict[str, str]:
67
+ try:
68
+ template_configs = self._load_templates(prompt_file, mode)
69
+ format_args = self._build_format_args(text, **extra_kwargs)
70
+
71
+ # Inject variables inside each template
72
+ for key in template_configs.keys():
73
+ template_configs[key] = template_configs[key].format(**format_args)
74
+
75
+ return template_configs
76
+
77
+ except KeyError as e:
78
+ raise PromptError(f"Missing template variable: {e}")
79
+ except Exception as e:
80
+ raise PromptError(f"Failed to format prompt: {e}")
@@ -5,15 +5,21 @@ import logging
5
5
  from openai import OpenAI
6
6
  from pydantic import BaseModel
7
7
 
8
- from texttools.tools.internals.models import ToolOutput
9
- from texttools.tools.internals.operator_utils import OperatorUtils
10
- from texttools.tools.internals.formatters import Formatter
11
- from texttools.tools.internals.prompt_loader import PromptLoader
8
+ from texttools.internals.models import ToolOutput
9
+ from texttools.internals.operator_utils import OperatorUtils
10
+ from texttools.internals.formatters import Formatter
11
+ from texttools.internals.prompt_loader import PromptLoader
12
+ from texttools.internals.exceptions import (
13
+ TextToolsError,
14
+ LLMError,
15
+ ValidationError,
16
+ PromptError,
17
+ )
12
18
 
13
19
  # Base Model type for output models
14
20
  T = TypeVar("T", bound=BaseModel)
15
21
 
16
- logger = logging.getLogger("texttools.operator")
22
+ logger = logging.getLogger("texttools.sync_operator")
17
23
 
18
24
 
19
25
  class Operator:
@@ -35,15 +41,33 @@ class Operator:
35
41
  Calls OpenAI API for analysis using the configured prompt template.
36
42
  Returns the analyzed content as a string.
37
43
  """
38
- analyze_prompt = prompt_configs["analyze_template"]
39
- analyze_message = [OperatorUtils.build_user_message(analyze_prompt)]
40
- completion = self._client.chat.completions.create(
41
- model=self._model,
42
- messages=analyze_message,
43
- temperature=temperature,
44
- )
45
- analysis = completion.choices[0].message.content.strip()
46
- return analysis
44
+ try:
45
+ analyze_prompt = prompt_configs["analyze_template"]
46
+
47
+ if not analyze_prompt:
48
+ raise PromptError("Analyze template is empty")
49
+
50
+ analyze_message = [OperatorUtils.build_user_message(analyze_prompt)]
51
+ completion = self._client.chat.completions.create(
52
+ model=self._model,
53
+ messages=analyze_message,
54
+ temperature=temperature,
55
+ )
56
+
57
+ if not completion.choices:
58
+ raise LLMError("No choices returned from LLM")
59
+
60
+ analysis = completion.choices[0].message.content.strip()
61
+
62
+ if not analysis:
63
+ raise LLMError("Empty analysis response")
64
+
65
+ return analysis.strip()
66
+
67
+ except Exception as e:
68
+ if isinstance(e, (PromptError, LLMError)):
69
+ raise
70
+ raise LLMError(f"Analysis failed: {e}")
47
71
 
48
72
  def _parse_completion(
49
73
  self,
@@ -58,23 +82,35 @@ class Operator:
58
82
  Parses a chat completion using OpenAI's structured output format.
59
83
  Returns both the parsed object and the raw completion for logprobs.
60
84
  """
61
- request_kwargs = {
62
- "model": self._model,
63
- "messages": message,
64
- "response_format": output_model,
65
- "temperature": temperature,
66
- }
85
+ try:
86
+ request_kwargs = {
87
+ "model": self._model,
88
+ "messages": message,
89
+ "response_format": output_model,
90
+ "temperature": temperature,
91
+ }
92
+
93
+ if logprobs:
94
+ request_kwargs["logprobs"] = True
95
+ request_kwargs["top_logprobs"] = top_logprobs
96
+ if priority:
97
+ request_kwargs["extra_body"] = {"priority": priority}
98
+ completion = self._client.beta.chat.completions.parse(**request_kwargs)
99
+
100
+ if not completion.choices:
101
+ raise LLMError("No choices returned from LLM")
102
+
103
+ parsed = completion.choices[0].message.parsed
67
104
 
68
- if logprobs:
69
- request_kwargs["logprobs"] = True
70
- request_kwargs["top_logprobs"] = top_logprobs
105
+ if not parsed:
106
+ raise LLMError("Failed to parse LLM response")
71
107
 
72
- if priority:
73
- request_kwargs["extra_body"] = {"priority": priority}
108
+ return parsed, completion
74
109
 
75
- completion = self._client.beta.chat.completions.parse(**request_kwargs)
76
- parsed = completion.choices[0].message.parsed
77
- return parsed, completion
110
+ except Exception as e:
111
+ if isinstance(e, LLMError):
112
+ raise
113
+ raise LLMError(f"Completion failed: {e}")
78
114
 
79
115
  def run(
80
116
  self,
@@ -96,12 +132,13 @@ class Operator:
96
132
  **extra_kwargs,
97
133
  ) -> ToolOutput:
98
134
  """
99
- Execute the LLM pipeline with the given input text.
135
+ Execute the LLM pipeline with the given input text. (Sync)
100
136
  """
101
- prompt_loader = PromptLoader()
102
- formatter = Formatter()
103
- output = ToolOutput()
104
137
  try:
138
+ prompt_loader = PromptLoader()
139
+ formatter = Formatter()
140
+ output = ToolOutput()
141
+
105
142
  # Prompt configs contain two keys: main_template and analyze template, both are string
106
143
  prompt_configs = prompt_loader.load(
107
144
  prompt_file=prompt_file,
@@ -140,6 +177,9 @@ class Operator:
140
177
 
141
178
  messages = formatter.user_merge_format(messages)
142
179
 
180
+ if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
181
+ raise ValueError("top_logprobs should be an integer greater than 1")
182
+
143
183
  parsed, completion = self._parse_completion(
144
184
  messages, output_model, temperature, logprobs, top_logprobs, priority
145
185
  )
@@ -148,6 +188,15 @@ class Operator:
148
188
 
149
189
  # Retry logic if validation fails
150
190
  if validator and not validator(output.result):
191
+ if (
192
+ not isinstance(max_validation_retries, int)
193
+ or max_validation_retries < 1
194
+ ):
195
+ raise ValueError(
196
+ "max_validation_retries should be a positive integer"
197
+ )
198
+
199
+ succeeded = False
151
200
  for attempt in range(max_validation_retries):
152
201
  logger.warning(
153
202
  f"Validation failed, retrying for the {attempt + 1} time."
@@ -155,6 +204,7 @@ class Operator:
155
204
 
156
205
  # Generate new temperature for retry
157
206
  retry_temperature = OperatorUtils.get_retry_temp(temperature)
207
+
158
208
  try:
159
209
  parsed, completion = self._parse_completion(
160
210
  messages,
@@ -162,28 +212,23 @@ class Operator:
162
212
  retry_temperature,
163
213
  logprobs,
164
214
  top_logprobs,
215
+ priority=priority,
165
216
  )
166
217
 
167
218
  output.result = parsed.result
168
219
 
169
220
  # Check if retry was successful
170
221
  if validator(output.result):
171
- logger.info(
172
- f"Validation passed on retry attempt {attempt + 1}"
173
- )
222
+ succeeded = True
174
223
  break
175
- else:
176
- logger.warning(
177
- f"Validation still failing after retry attempt {attempt + 1}"
178
- )
179
224
 
180
- except Exception as e:
225
+ except LLMError as e:
181
226
  logger.error(f"Retry attempt {attempt + 1} failed: {e}")
182
- # Continue to next retry attempt if this one fails
183
227
 
184
- # Final check after all retries
185
- if validator and not validator(output.result):
186
- output.errors.append("Validation failed after all retry attempts")
228
+ if not succeeded:
229
+ raise ValidationError(
230
+ f"Validation failed after {max_validation_retries} retries"
231
+ )
187
232
 
188
233
  if logprobs:
189
234
  output.logprobs = OperatorUtils.extract_logprobs(completion)
@@ -195,7 +240,7 @@ class Operator:
195
240
 
196
241
  return output
197
242
 
243
+ except (PromptError, LLMError, ValidationError):
244
+ raise
198
245
  except Exception as e:
199
- logger.error(f"TheTool failed: {e}")
200
- output.errors.append(str(e))
201
- return output
246
+ raise TextToolsError(f"Unexpected error in operator: {e}")
@@ -0,0 +1,15 @@
1
+ main_template: |
2
+ You are an expert in breaking down text into atomic propositions in that language.
3
+ An atomic proposition is a single, self-contained fact that is concise, verifiable,
4
+ and does not rely on external context.
5
+ Each proposition must stand alone.
6
+ Rewrite sentences if needed to keep the context saved in each sentence.
7
+ Extract the atomic propositions of this text:
8
+ {input}
9
+
10
+ analyze_template: |
11
+ We want to analyze this text snippet and think about where we can split sentence to atomic meaningful propositions.
12
+ An atomic proposition is a single, self-contained fact that is concise,
13
+ verifiable, and does not rely on external context.
14
+ You just have to think around the possible propositions in the text and how a proposition can be made.
15
+ {input}