hamtaa-texttools 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,11 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import math
5
- import re
6
- from typing import Any, Literal, TypeVar
1
+ from typing import Any, TypeVar, Type, Literal
7
2
  import logging
8
3
 
9
4
  from openai import AsyncOpenAI
10
5
  from pydantic import BaseModel
11
6
 
12
- from texttools.formatters.user_merge_formatter import (
13
- UserMergeFormatter,
14
- )
7
+ from texttools.tools.internals.base_operator import BaseOperator
8
+ from texttools.tools.internals.formatters import Formatter
15
9
  from texttools.tools.internals.prompt_loader import PromptLoader
16
10
 
17
11
  # Base Model type for output models
@@ -22,238 +16,118 @@ logger = logging.getLogger("async_operator")
22
16
  logger.setLevel(logging.INFO)
23
17
 
24
18
 
25
- class AsyncOperator:
19
+ class AsyncOperator(BaseOperator):
26
20
  """
27
- Async version of Operator.
21
+ Core engine for running text-processing operations with an LLM (Async).
28
22
 
29
- Behaves like the synchronous Operator but uses AsyncOpenAI and async/await.
23
+ It wires together:
24
+ - `PromptLoader` → loads YAML prompt templates.
25
+ - `UserMergeFormatter` → applies formatting to messages (e.g., merging).
26
+ - AsyncOpenAI client → executes completions/parsed completions.
30
27
  """
31
28
 
32
- def __init__(
33
- self,
34
- client: AsyncOpenAI,
35
- *,
36
- model: str,
37
- temperature: float = 0.0,
38
- **client_kwargs: Any,
39
- ):
40
- self.client: AsyncOpenAI = client
29
+ def __init__(self, client: AsyncOpenAI, model: str):
30
+ self.client = client
41
31
  self.model = model
42
- self.temperature = temperature
43
- self.client_kwargs = client_kwargs
44
-
45
- def _build_user_message(self, prompt: str) -> dict[str, str]:
46
- return {"role": "user", "content": prompt}
47
-
48
- async def _analysis_completion(self, analyze_message: list[dict[str, str]]) -> str:
49
- try:
50
- completion = await self.client.chat.completions.create(
51
- model=self.model,
52
- messages=analyze_message,
53
- temperature=self.temperature,
54
- **self.client_kwargs,
55
- )
56
- analysis = completion.choices[0].message.content.strip()
57
- return analysis
58
-
59
- except Exception as e:
60
- print(f"[ERROR] Analysis failed: {e}")
61
- raise
62
32
 
63
- async def _analyze(self, prompt_configs: dict[str, str]) -> str:
33
+ async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
64
34
  analyze_prompt = prompt_configs["analyze_template"]
65
35
  analyze_message = [self._build_user_message(analyze_prompt)]
66
- analysis = await self._analysis_completion(analyze_message)
67
-
36
+ completion = await self.client.chat.completions.create(
37
+ model=self.model,
38
+ messages=analyze_message,
39
+ temperature=temperature,
40
+ )
41
+ analysis = completion.choices[0].message.content.strip()
68
42
  return analysis
69
43
 
70
44
  async def _parse_completion(
71
45
  self,
72
46
  message: list[dict[str, str]],
73
- output_model: T,
47
+ output_model: Type[T],
48
+ temperature: float,
74
49
  logprobs: bool = False,
75
50
  top_logprobs: int = 3,
76
- max_tokens: int | None = None,
77
- ) -> tuple[T, Any]:
78
- try:
79
- request_kwargs = {
80
- "model": self.model,
81
- "messages": message,
82
- "response_format": output_model,
83
- "temperature": self.temperature,
84
- **self.client_kwargs,
85
- }
86
-
87
- if max_tokens is not None:
88
- request_kwargs["max_tokens"] = max_tokens
89
-
90
- if logprobs:
91
- request_kwargs["logprobs"] = True
92
- request_kwargs["top_logprobs"] = top_logprobs
93
-
94
- completion = await self.client.beta.chat.completions.parse(**request_kwargs)
95
- parsed = completion.choices[0].message.parsed
96
- return parsed, completion
97
-
98
- except Exception as e:
99
- print(f"[ERROR] Failed to parse completion: {e}")
100
- raise
101
-
102
- def _clean_json_response(self, response: str) -> str:
103
- """
104
- Clean JSON response by removing code block markers and whitespace.
105
- Handles cases like:
106
- - ```json{"result": "value"}```
107
- """
108
- cleaned = response.strip()
109
-
110
- # Remove ```json marker
111
- if cleaned.startswith("```json"):
112
- cleaned = cleaned[7:]
113
-
114
- # Remove trailing ```
115
- if cleaned.endswith("```"):
116
- cleaned = cleaned[:-3]
117
-
118
- return cleaned.strip()
119
-
120
- def _convert_to_output_model(self, response_string: str, output_model: T) -> T:
121
- """
122
- Convert a JSON response string to output model.
123
-
124
- Args:
125
- response_string: The JSON string (may contain code block markers)
126
- output_model: Your Pydantic output model class (e.g., StrOutput, ListStrOutput)
127
-
128
- Returns:
129
- Instance of your output model
130
- """
131
- try:
132
- # Clean the response string
133
- cleaned_json = self._clean_json_response(response_string)
134
-
135
- # Fix Python-style booleans
136
- cleaned_json = cleaned_json.replace("False", "false").replace(
137
- "True", "true"
138
- )
139
-
140
- # Convert string to Python dictionary
141
- response_dict = json.loads(cleaned_json)
142
-
143
- # Convert dictionary to output model
144
- return output_model(**response_dict)
145
-
146
- except json.JSONDecodeError as e:
147
- raise ValueError(
148
- f"Failed to parse JSON response: {e}\nResponse: {response_string}"
149
- )
150
- except Exception as e:
151
- raise ValueError(f"Failed to convert to output model: {e}")
51
+ ) -> tuple[Type[T], Any]:
52
+ request_kwargs = {
53
+ "model": self.model,
54
+ "messages": message,
55
+ "response_format": output_model,
56
+ "temperature": temperature,
57
+ }
58
+
59
+ if logprobs:
60
+ request_kwargs["logprobs"] = True
61
+ request_kwargs["top_logprobs"] = top_logprobs
62
+
63
+ completion = await self.client.beta.chat.completions.parse(**request_kwargs)
64
+ parsed = completion.choices[0].message.parsed
65
+ return parsed, completion
152
66
 
153
67
  async def _vllm_completion(
154
68
  self,
155
69
  message: list[dict[str, str]],
156
- output_model: T,
70
+ output_model: Type[T],
71
+ temperature: float,
157
72
  logprobs: bool = False,
158
73
  top_logprobs: int = 3,
159
- max_tokens: int | None = None,
160
- ) -> tuple[T, Any]:
161
- try:
162
- json_schema = output_model.model_json_schema()
163
-
164
- # Build kwargs dynamically
165
- request_kwargs = {
166
- "model": self.model,
167
- "messages": message,
168
- "extra_body": {"guided_json": json_schema},
169
- "temperature": self.temperature,
170
- **self.client_kwargs,
171
- }
172
-
173
- if max_tokens is not None:
174
- request_kwargs["max_tokens"] = max_tokens
175
-
176
- if logprobs:
177
- request_kwargs["logprobs"] = True
178
- request_kwargs["top_logprobs"] = top_logprobs
74
+ ) -> tuple[Type[T], Any]:
75
+ json_schema = output_model.model_json_schema()
179
76
 
180
- completion = await self.client.chat.completions.create(**request_kwargs)
181
- response = completion.choices[0].message.content
77
+ # Build kwargs dynamically
78
+ request_kwargs = {
79
+ "model": self.model,
80
+ "messages": message,
81
+ "extra_body": {"guided_json": json_schema},
82
+ "temperature": temperature,
83
+ }
182
84
 
183
- # Convert the string response to output model
184
- parsed = self._convert_to_output_model(response, output_model)
85
+ if logprobs:
86
+ request_kwargs["logprobs"] = True
87
+ request_kwargs["top_logprobs"] = top_logprobs
185
88
 
186
- return parsed, completion
89
+ completion = await self.client.chat.completions.create(**request_kwargs)
90
+ response = completion.choices[0].message.content
187
91
 
188
- except Exception as e:
189
- print(f"[ERROR] Failed to get vLLM structured output: {e}")
190
- raise
191
-
192
- def _extract_logprobs(self, completion: dict):
193
- logprobs_data = []
194
- ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
195
-
196
- for choice in completion.choices:
197
- if not getattr(choice, "logprobs", None):
198
- logger.info("No logprobs found.")
199
- continue
200
-
201
- for logprob_item in choice.logprobs.content:
202
- if ignore_pattern.match(logprob_item.token):
203
- continue
204
- token_entry = {
205
- "token": logprob_item.token,
206
- "prob": round(math.exp(logprob_item.logprob), 8),
207
- "top_alternatives": [],
208
- }
209
- for alt in logprob_item.top_logprobs:
210
- if ignore_pattern.match(alt.token):
211
- continue
212
- token_entry["top_alternatives"].append(
213
- {
214
- "token": alt.token,
215
- "prob": round(math.exp(alt.logprob), 8),
216
- }
217
- )
218
- logprobs_data.append(token_entry)
219
-
220
- return logprobs_data
92
+ # Convert the string response to output model
93
+ parsed = self._convert_to_output_model(response, output_model)
94
+ return parsed, completion
221
95
 
222
96
  async def run(
223
97
  self,
224
- input_text: str,
98
+ # User parameters
99
+ text: str,
100
+ with_analysis: bool,
101
+ output_lang: str | None,
102
+ user_prompt: str | None,
103
+ temperature: float,
104
+ logprobs: bool,
105
+ top_logprobs: int | None,
106
+ # Internal parameters
225
107
  prompt_file: str,
226
- output_model: T,
227
- with_analysis: bool = False,
228
- use_modes: bool = False,
229
- mode: str = "",
230
- resp_format: Literal["vllm", "parse"] = "parse",
231
- output_lang: str | None = None,
232
- logprobs: bool = False,
233
- top_logprobs: int = 3,
234
- max_tokens: int | None = None,
108
+ output_model: Type[T],
109
+ resp_format: Literal["vllm", "parse"],
110
+ mode: str | None,
235
111
  **extra_kwargs,
236
112
  ) -> dict[str, Any]:
237
113
  """
238
- Execute the async LLM pipeline with the given input text.
114
+ Execute the async LLM pipeline with the given input text. (Async)
239
115
  """
240
116
  prompt_loader = PromptLoader()
241
- formatter = UserMergeFormatter()
117
+ formatter = Formatter()
242
118
 
243
119
  try:
244
- cleaned_text = input_text.strip()
245
-
246
120
  prompt_configs = prompt_loader.load(
247
121
  prompt_file=prompt_file,
248
- text=cleaned_text,
249
- mode=mode if use_modes else "",
122
+ text=text.strip(),
123
+ mode=mode,
250
124
  **extra_kwargs,
251
125
  )
252
126
 
253
127
  messages: list[dict[str, str]] = []
254
128
 
255
129
  if with_analysis:
256
- analysis = await self._analyze(prompt_configs)
130
+ analysis = await self._analyze(prompt_configs, temperature)
257
131
  messages.append(
258
132
  self._build_user_message(f"Based on this analysis: {analysis}")
259
133
  )
@@ -265,27 +139,22 @@ class AsyncOperator:
265
139
  )
266
140
  )
267
141
 
142
+ if user_prompt:
143
+ messages.append(
144
+ self._build_user_message(f"Consider this instruction {user_prompt}")
145
+ )
146
+
268
147
  messages.append(self._build_user_message(prompt_configs["main_template"]))
269
- messages = formatter.format(messages)
148
+ messages = formatter.user_merge_format(messages)
270
149
 
271
150
  if resp_format == "vllm":
272
151
  parsed, completion = await self._vllm_completion(
273
- messages,
274
- output_model,
275
- logprobs,
276
- top_logprobs,
277
- max_tokens,
152
+ messages, output_model, temperature, logprobs, top_logprobs
278
153
  )
279
154
  elif resp_format == "parse":
280
- parsed, completion = await self._parse_completion(
281
- messages,
282
- output_model,
283
- logprobs,
284
- top_logprobs,
285
- max_tokens,
155
+ parsed, completion = await self._vllm_completion(
156
+ messages, output_model, temperature, logprobs, top_logprobs
286
157
  )
287
- else:
288
- logger.error(f"Unknown resp_format: {resp_format}")
289
158
 
290
159
  # Ensure output_model has a `result` field
291
160
  if not hasattr(parsed, "result"):
@@ -305,4 +174,4 @@ class AsyncOperator:
305
174
 
306
175
  except Exception as e:
307
176
  logger.error(f"Async TheTool failed: {e}")
308
- return {"Error": str(e), "result": ""}
177
+ return {"error": str(e), "result": ""}
@@ -0,0 +1,85 @@
1
+ from typing import TypeVar, Type, Any
2
+ import json
3
+ import re
4
+ import math
5
+
6
+ from pydantic import BaseModel
7
+ from openai import OpenAI, AsyncOpenAI
8
+
9
+ # Base Model type for output models
10
+ T = TypeVar("T", bound=BaseModel)
11
+
12
+
13
+ class BaseOperator:
14
+ def __init__(self, client: OpenAI | AsyncOpenAI, model: str):
15
+ self.client = client
16
+ self.model = model
17
+
18
+ def _build_user_message(self, prompt: str) -> dict[str, str]:
19
+ return {"role": "user", "content": prompt}
20
+
21
+ def _clean_json_response(self, response: str) -> str:
22
+ """
23
+ Clean JSON response by removing code block markers and whitespace.
24
+ Handles cases like:
25
+ - ```json{"result": "value"}```
26
+ """
27
+ stripped = response.strip()
28
+ cleaned = re.sub(r"^```(?:json)?\s*", "", stripped)
29
+ cleaned = re.sub(r"\s*```$", "", cleaned)
30
+
31
+ return cleaned.strip()
32
+
33
+ def _convert_to_output_model(
34
+ self, response_string: str, output_model: Type[T]
35
+ ) -> Type[T]:
36
+ """
37
+ Convert a JSON response string to output model.
38
+
39
+ Args:
40
+ response_string: The JSON string (may contain code block markers)
41
+ output_model: Your Pydantic output model class (e.g., StrOutput, ListStrOutput)
42
+
43
+ Returns:
44
+ Instance of your output model
45
+ """
46
+ # Clean the response string
47
+ cleaned_json = self._clean_json_response(response_string)
48
+
49
+ # Fix Python-style booleans
50
+ cleaned_json = cleaned_json.replace("False", "false").replace("True", "true")
51
+
52
+ # Convert string to Python dictionary
53
+ response_dict = json.loads(cleaned_json)
54
+
55
+ # Convert dictionary to output model
56
+ return output_model(**response_dict)
57
+
58
+ def _extract_logprobs(self, completion: dict) -> list[dict[str, Any]]:
59
+ logprobs_data = []
60
+ ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
61
+
62
+ for choice in completion.choices:
63
+ if not getattr(choice, "logprobs", None):
64
+ continue
65
+
66
+ for logprob_item in choice.logprobs.content:
67
+ if ignore_pattern.match(logprob_item.token):
68
+ continue
69
+ token_entry = {
70
+ "token": logprob_item.token,
71
+ "prob": round(math.exp(logprob_item.logprob), 8),
72
+ "top_alternatives": [],
73
+ }
74
+ for alt in logprob_item.top_logprobs:
75
+ if ignore_pattern.match(alt.token):
76
+ continue
77
+ token_entry["top_alternatives"].append(
78
+ {
79
+ "token": alt.token,
80
+ "prob": round(math.exp(alt.logprob), 8),
81
+ }
82
+ )
83
+ logprobs_data.append(token_entry)
84
+
85
+ return logprobs_data
@@ -0,0 +1,24 @@
1
+ class Formatter:
2
+ @staticmethod
3
+ def user_merge_format(messages: list[dict[str, str]]) -> list[dict[str, str]]:
4
+ """
5
+ Merges consecutive user messages into a single message, separated by newlines.
6
+
7
+ This is useful for condensing a multi-turn user input into a single
8
+ message for the LLM. Assistant and system messages are left unchanged and
9
+ act as separators between user message groups.
10
+ """
11
+ merged: list[dict[str, str]] = []
12
+
13
+ for message in messages:
14
+ role, content = message["role"], message["content"].strip()
15
+
16
+ # Merge with previous user turn
17
+ if merged and role == "user" and merged[-1]["role"] == "user":
18
+ merged[-1]["content"] += "\n" + content
19
+
20
+ # Otherwise, start a new turn
21
+ else:
22
+ merged.append({"role": role, "content": content})
23
+
24
+ return merged