hamtaa-texttools 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,11 @@
1
- from __future__ import annotations
2
-
3
- import math
4
- import re
5
1
  from typing import Any, TypeVar, Type, Literal
6
- import json
7
2
  import logging
8
3
 
9
4
  from openai import OpenAI
10
5
  from pydantic import BaseModel
11
6
 
12
- from texttools.formatters.user_merge_formatter import (
13
- UserMergeFormatter,
14
- )
7
+ from texttools.tools.internals.base_operator import BaseOperator
8
+ from texttools.tools.internals.formatters import Formatter
15
9
  from texttools.tools.internals.prompt_loader import PromptLoader
16
10
 
17
11
  # Base Model type for output models
@@ -22,79 +16,46 @@ logger = logging.getLogger("operator")
22
16
  logger.setLevel(logging.INFO)
23
17
 
24
18
 
25
- class Operator:
19
+ class Operator(BaseOperator):
26
20
  """
27
- Core engine for running text-processing operations with an LLM.
21
+ Core engine for running text-processing operations with an LLM (Sync).
28
22
 
29
23
  It wires together:
30
24
  - `PromptLoader` → loads YAML prompt templates.
31
25
  - `UserMergeFormatter` → applies formatting to messages (e.g., merging).
32
26
  - OpenAI client → executes completions/parsed completions.
33
-
34
- Workflow inside `run()`:
35
- 1. Load prompt templates (`main_template` [+ `analyze_template` if enabled]).
36
- 2. Optionally generate an "analysis" step via `_analyze()`.
37
- 3. Build messages for the LLM.
38
- 4. Call `.beta.chat.completions.parse()` to parse the result into the
39
- configured `OUTPUT_MODEL` (a Pydantic schema).
40
- 5. Return results as a dict (always `{"result": ...}`, plus `analysis`
41
- if analysis was enabled).
42
-
43
- Attributes configured dynamically by `TheTool`:
44
- - PROMPT_FILE: str → YAML filename
45
- - OUTPUT_MODEL: Pydantic model class
46
- - WITH_ANALYSIS: bool → whether to run an analysis phase first
47
- - USE_MODES: bool → whether to select prompts by mode
48
- - MODE: str → which mode to use if modes are enabled
49
- - RESP_FORMAT: str → "vllm" or "parse"
50
27
  """
51
28
 
52
- def __init__(self, client: OpenAI):
53
- self.client: OpenAI = client
29
+ def __init__(self, client: OpenAI, model: str):
30
+ self.client = client
31
+ self.model = model
54
32
 
55
- def _build_user_message(self, prompt: str) -> dict[str, str]:
56
- return {"role": "user", "content": prompt}
57
-
58
- def _analysis_completion(
59
- self,
60
- analyze_message: list[dict[str, str]],
61
- model: str,
62
- temperature: float,
63
- ) -> str:
33
+ def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
34
+ analyze_prompt = prompt_configs["analyze_template"]
35
+ analyze_message = [self._build_user_message(analyze_prompt)]
64
36
  completion = self.client.chat.completions.create(
65
- model=model,
37
+ model=self.model,
66
38
  messages=analyze_message,
67
39
  temperature=temperature,
68
40
  )
69
41
  analysis = completion.choices[0].message.content.strip()
70
42
  return analysis
71
43
 
72
- def _analyze(
73
- self,
74
- prompt_configs: dict[str, str],
75
- model: str,
76
- temperature: float,
77
- ) -> str:
78
- analyze_prompt = prompt_configs["analyze_template"]
79
- analyze_message = [self._build_user_message(analyze_prompt)]
80
- analysis = self._analysis_completion(analyze_message, model, temperature)
81
- return analysis
82
-
83
44
  def _parse_completion(
84
45
  self,
85
46
  message: list[dict[str, str]],
86
47
  output_model: Type[T],
87
- model: str,
88
48
  temperature: float,
89
49
  logprobs: bool = False,
90
50
  top_logprobs: int = 3,
91
51
  ) -> tuple[Type[T], Any]:
92
52
  request_kwargs = {
93
- "model": model,
53
+ "model": self.model,
94
54
  "messages": message,
95
55
  "response_format": output_model,
96
56
  "temperature": temperature,
97
57
  }
58
+
98
59
  if logprobs:
99
60
  request_kwargs["logprobs"] = True
100
61
  request_kwargs["top_logprobs"] = top_logprobs
@@ -103,48 +64,10 @@ class Operator:
103
64
  parsed = completion.choices[0].message.parsed
104
65
  return parsed, completion
105
66
 
106
- def _clean_json_response(self, response: str) -> str:
107
- """
108
- Clean JSON response by removing code block markers and whitespace.
109
- Handles cases like:
110
- - ```json{"result": "value"}```
111
- """
112
- stripped = response.strip()
113
- cleaned = re.sub(r"^```(?:json)?\s*", "", stripped)
114
- cleaned = re.sub(r"\s*```$", "", cleaned)
115
-
116
- return cleaned.strip()
117
-
118
- def _convert_to_output_model(
119
- self, response_string: str, output_model: Type[T]
120
- ) -> Type[T]:
121
- """
122
- Convert a JSON response string to output model.
123
-
124
- Args:
125
- response_string: The JSON string (may contain code block markers)
126
- output_model: Your Pydantic output model class (e.g., StrOutput, ListStrOutput)
127
-
128
- Returns:
129
- Instance of your output model
130
- """
131
- # Clean the response string
132
- cleaned_json = self._clean_json_response(response_string)
133
-
134
- # Fix Python-style booleans
135
- cleaned_json = cleaned_json.replace("False", "false").replace("True", "true")
136
-
137
- # Convert string to Python dictionary
138
- response_dict = json.loads(cleaned_json)
139
-
140
- # Convert dictionary to output model
141
- return output_model(**response_dict)
142
-
143
67
  def _vllm_completion(
144
68
  self,
145
69
  message: list[dict[str, str]],
146
70
  output_model: Type[T],
147
- model: str,
148
71
  temperature: float,
149
72
  logprobs: bool = False,
150
73
  top_logprobs: int = 3,
@@ -153,7 +76,7 @@ class Operator:
153
76
 
154
77
  # Build kwargs dynamically
155
78
  request_kwargs = {
156
- "model": model,
79
+ "model": self.model,
157
80
  "messages": message,
158
81
  "extra_body": {"guided_json": json_schema},
159
82
  "temperature": temperature,
@@ -170,73 +93,33 @@ class Operator:
170
93
  parsed = self._convert_to_output_model(response, output_model)
171
94
  return parsed, completion
172
95
 
173
- def _extract_logprobs(self, completion: dict):
174
- logprobs_data = []
175
- ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
176
-
177
- for choice in completion.choices:
178
- if not getattr(choice, "logprobs", None):
179
- logger.info("No logprobs found.")
180
- continue
181
-
182
- for logprob_item in choice.logprobs.content:
183
- if ignore_pattern.match(logprob_item.token):
184
- continue
185
- token_entry = {
186
- "token": logprob_item.token,
187
- "prob": round(math.exp(logprob_item.logprob), 8),
188
- "top_alternatives": [],
189
- }
190
- for alt in logprob_item.top_logprobs:
191
- if ignore_pattern.match(alt.token):
192
- continue
193
- token_entry["top_alternatives"].append(
194
- {
195
- "token": alt.token,
196
- "prob": round(math.exp(alt.logprob), 8),
197
- }
198
- )
199
- logprobs_data.append(token_entry)
200
-
201
- return logprobs_data
202
-
203
96
  def run(
204
97
  self,
205
- text: str,
206
98
  # User parameters
207
- model: str,
99
+ text: str,
208
100
  with_analysis: bool,
101
+ output_lang: str | None,
102
+ user_prompt: str | None,
209
103
  temperature: float,
210
104
  logprobs: bool,
211
- top_logprobs: int,
212
- user_prompt: str | None,
213
- output_lang: str | None,
214
- # Each tool's parameters
105
+ top_logprobs: int | None,
106
+ # Internal parameters
215
107
  prompt_file: str,
216
108
  output_model: Type[T],
217
- resp_format: Literal["vllm", "parse"] = "parse",
218
- mode: str | None = None,
109
+ resp_format: Literal["vllm", "parse"],
110
+ mode: str | None,
219
111
  **extra_kwargs,
220
112
  ) -> dict[str, Any]:
221
113
  """
222
114
  Execute the LLM pipeline with the given input text.
223
-
224
- Args:
225
- text: The text to process (will be stripped of whitespace)
226
- **extra_kwargs: Additional variables to inject into prompt templates
227
-
228
- Returns:
229
- Dictionary containing the parsed result and optional analysis
230
115
  """
231
116
  prompt_loader = PromptLoader()
232
- formatter = UserMergeFormatter()
117
+ formatter = Formatter()
233
118
 
234
119
  try:
235
- cleaned_text = text.strip()
236
-
237
120
  prompt_configs = prompt_loader.load(
238
121
  prompt_file=prompt_file,
239
- text=cleaned_text,
122
+ text=text.strip(),
240
123
  mode=mode,
241
124
  **extra_kwargs,
242
125
  )
@@ -244,7 +127,7 @@ class Operator:
244
127
  messages: list[dict[str, str]] = []
245
128
 
246
129
  if with_analysis:
247
- analysis = self._analyze(prompt_configs, model, temperature)
130
+ analysis = self._analyze(prompt_configs, temperature)
248
131
  messages.append(
249
132
  self._build_user_message(f"Based on this analysis: {analysis}")
250
133
  )
@@ -262,16 +145,15 @@ class Operator:
262
145
  )
263
146
 
264
147
  messages.append(self._build_user_message(prompt_configs["main_template"]))
265
-
266
- messages = formatter.format(messages)
148
+ messages = formatter.user_merge_format(messages)
267
149
 
268
150
  if resp_format == "vllm":
269
151
  parsed, completion = self._vllm_completion(
270
- messages, output_model, model, temperature, logprobs, top_logprobs
152
+ messages, output_model, temperature, logprobs, top_logprobs
271
153
  )
272
154
  elif resp_format == "parse":
273
155
  parsed, completion = self._parse_completion(
274
- messages, output_model, model, temperature, logprobs, top_logprobs
156
+ messages, output_model, temperature, logprobs, top_logprobs
275
157
  )
276
158
 
277
159
  # Ensure output_model has a `result` field
@@ -280,16 +162,16 @@ class Operator:
280
162
  "The provided output_model must define a field named 'result'"
281
163
  )
282
164
 
283
- results = {"result": parsed.result}
165
+ result = {"result": parsed.result}
284
166
 
285
167
  if logprobs:
286
- results["logprobs"] = self._extract_logprobs(completion)
168
+ result["logprobs"] = self._extract_logprobs(completion)
287
169
 
288
170
  if with_analysis:
289
- results["analysis"] = analysis
171
+ result["analysis"] = analysis
290
172
 
291
- return results
173
+ return result
292
174
 
293
175
  except Exception as e:
294
176
  logger.error(f"TheTool failed: {e}")
295
- return {"Error": str(e), "result": ""}
177
+ return {"error": str(e), "result": ""}
@@ -18,24 +18,15 @@ class PromptLoader:
18
18
  }
19
19
  """
20
20
 
21
- def __init__(self):
22
- self.base_dir = Path(__file__).parent.parent.parent / Path("prompts")
23
-
24
21
  MAIN_TEMPLATE: str = "main_template"
25
22
  ANALYZE_TEMPLATE: str = "analyze_template"
26
23
 
27
24
  # Use lru_cache to load each file once
28
25
  @lru_cache(maxsize=32)
29
26
  def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
30
- prompt_path = self.base_dir / prompt_file
31
-
32
- if not prompt_path.exists():
33
- raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
34
-
35
- try:
36
- data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
37
- except yaml.YAMLError as e:
38
- raise ValueError(f"Invalid YAML in {prompt_path}: {e}")
27
+ base_dir = Path(__file__).parent.parent.parent / Path("prompts")
28
+ prompt_path = base_dir / prompt_file
29
+ data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
39
30
 
40
31
  return {
41
32
  self.MAIN_TEMPLATE: data[self.MAIN_TEMPLATE][mode]