hamtaa-texttools 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. hamtaa_texttools-1.2.0.dist-info/METADATA +212 -0
  2. hamtaa_texttools-1.2.0.dist-info/RECORD +34 -0
  3. texttools/__init__.py +6 -8
  4. texttools/batch/__init__.py +0 -4
  5. texttools/batch/config.py +40 -0
  6. texttools/batch/{batch_manager.py → manager.py} +41 -42
  7. texttools/batch/runner.py +228 -0
  8. texttools/core/__init__.py +0 -0
  9. texttools/core/engine.py +254 -0
  10. texttools/core/exceptions.py +22 -0
  11. texttools/core/internal_models.py +58 -0
  12. texttools/core/operators/async_operator.py +194 -0
  13. texttools/core/operators/sync_operator.py +192 -0
  14. texttools/models.py +88 -0
  15. texttools/prompts/categorize.yaml +36 -0
  16. texttools/prompts/check_fact.yaml +24 -0
  17. texttools/prompts/extract_entities.yaml +7 -3
  18. texttools/prompts/extract_keywords.yaml +80 -18
  19. texttools/prompts/is_question.yaml +6 -2
  20. texttools/prompts/merge_questions.yaml +12 -5
  21. texttools/prompts/propositionize.yaml +24 -0
  22. texttools/prompts/rewrite.yaml +9 -10
  23. texttools/prompts/run_custom.yaml +2 -2
  24. texttools/prompts/subject_to_question.yaml +7 -3
  25. texttools/prompts/summarize.yaml +6 -2
  26. texttools/prompts/text_to_question.yaml +12 -6
  27. texttools/prompts/translate.yaml +7 -2
  28. texttools/py.typed +0 -0
  29. texttools/tools/__init__.py +0 -4
  30. texttools/tools/async_tools.py +1093 -0
  31. texttools/tools/sync_tools.py +1092 -0
  32. hamtaa_texttools-1.1.1.dist-info/METADATA +0 -183
  33. hamtaa_texttools-1.1.1.dist-info/RECORD +0 -30
  34. texttools/batch/batch_runner.py +0 -263
  35. texttools/prompts/README.md +0 -35
  36. texttools/prompts/categorizer.yaml +0 -28
  37. texttools/tools/async_the_tool.py +0 -414
  38. texttools/tools/internals/async_operator.py +0 -179
  39. texttools/tools/internals/base_operator.py +0 -91
  40. texttools/tools/internals/formatters.py +0 -24
  41. texttools/tools/internals/operator.py +0 -179
  42. texttools/tools/internals/output_models.py +0 -59
  43. texttools/tools/internals/prompt_loader.py +0 -57
  44. texttools/tools/the_tool.py +0 -412
  45. {hamtaa_texttools-1.1.1.dist-info → hamtaa_texttools-1.2.0.dist-info}/WHEEL +0 -0
  46. {hamtaa_texttools-1.1.1.dist-info → hamtaa_texttools-1.2.0.dist-info}/licenses/LICENSE +0 -0
  47. {hamtaa_texttools-1.1.1.dist-info → hamtaa_texttools-1.2.0.dist-info}/top_level.txt +0 -0
@@ -1,414 +0,0 @@
1
- from typing import Literal, Any
2
-
3
- from openai import AsyncOpenAI
4
-
5
- from texttools.tools.internals.async_operator import AsyncOperator
6
- import texttools.tools.internals.output_models as OutputModels
7
-
8
-
9
- class AsyncTheTool:
10
- """
11
- Async counterpart to TheTool.
12
-
13
- Each method configures the async operator with a specific YAML prompt,
14
- output schema, and flags, then delegates execution to `operator.run()`.
15
-
16
- Usage:
17
- async_client = AsyncOpenAI(...)
18
- tool = TheToolAsync(async_client, model="model-name")
19
- result = await tool.categorize("text ...", with_analysis=True)
20
- """
21
-
22
- def __init__(
23
- self,
24
- client: AsyncOpenAI,
25
- model: str,
26
- ):
27
- self.operator = AsyncOperator(client=client, model=model)
28
-
29
- async def categorize(
30
- self,
31
- text: str,
32
- with_analysis: bool = False,
33
- user_prompt: str | None = None,
34
- temperature: float | None = 0.0,
35
- logprobs: bool = False,
36
- top_logprobs: int | None = None,
37
- ) -> dict[str, str]:
38
- """
39
- Categorize a text into a single Islamic studies domain category.
40
-
41
- Returns:
42
- ToolOutput: Object containing:
43
- - result (str): The assigned Islamic studies category
44
- - logprobs (list | None): Probability data if logprobs enabled
45
- - analysis (str | None): Detailed reasoning if with_analysis enabled
46
- """
47
- return await self.operator.run(
48
- # User parameters
49
- text=text,
50
- with_analysis=with_analysis,
51
- user_prompt=user_prompt,
52
- temperature=temperature,
53
- logprobs=logprobs,
54
- top_logprobs=top_logprobs,
55
- # Internal parameters
56
- prompt_file="categorizer.yaml",
57
- output_model=OutputModels.CategorizerOutput,
58
- resp_format="parse",
59
- mode=None,
60
- output_lang=None,
61
- )
62
-
63
- async def extract_keywords(
64
- self,
65
- text: str,
66
- with_analysis: bool = False,
67
- output_lang: str | None = None,
68
- user_prompt: str | None = None,
69
- temperature: float | None = 0.0,
70
- logprobs: bool = False,
71
- top_logprobs: int | None = None,
72
- ) -> dict[str, list[str]]:
73
- """
74
- Extract salient keywords from text.
75
-
76
- Returns:
77
- ToolOutput: Object containing:
78
- - result (list[str]): List of extracted keywords
79
- - logprobs (list | None): Probability data if logprobs enabled
80
- - analysis (str | None): Detailed reasoning if with_analysis enabled
81
- """
82
- return await self.operator.run(
83
- # User parameters
84
- text=text,
85
- with_analysis=with_analysis,
86
- output_lang=output_lang,
87
- user_prompt=user_prompt,
88
- temperature=temperature,
89
- logprobs=logprobs,
90
- top_logprobs=top_logprobs,
91
- # Internal parameters
92
- prompt_file="extract_keywords.yaml",
93
- output_model=OutputModels.ListStrOutput,
94
- resp_format="parse",
95
- mode=None,
96
- )
97
-
98
- async def extract_entities(
99
- self,
100
- text: str,
101
- with_analysis: bool = False,
102
- output_lang: str | None = None,
103
- user_prompt: str | None = None,
104
- temperature: float | None = 0.0,
105
- logprobs: bool = False,
106
- top_logprobs: int | None = None,
107
- ) -> dict[str, list[dict[str, str]]]:
108
- """
109
- Perform Named Entity Recognition (NER) over the input text.
110
-
111
- Returns:
112
- ToolOutput: Object containing:
113
- - result (list[dict]): List of entities with 'text' and 'type' keys
114
- - logprobs (list | None): Probability data if logprobs enabled
115
- - analysis (str | None): Detailed reasoning if with_analysis enabled
116
- """
117
- return await self.operator.run(
118
- # User parameters
119
- text=text,
120
- with_analysis=with_analysis,
121
- output_lang=output_lang,
122
- user_prompt=user_prompt,
123
- temperature=temperature,
124
- logprobs=logprobs,
125
- top_logprobs=top_logprobs,
126
- # Internal parameters
127
- prompt_file="extract_entities.yaml",
128
- output_model=OutputModels.ListDictStrStrOutput,
129
- resp_format="parse",
130
- mode=None,
131
- )
132
-
133
- async def is_question(
134
- self,
135
- text: str,
136
- with_analysis: bool = False,
137
- user_prompt: str | None = None,
138
- temperature: float | None = 0.0,
139
- logprobs: bool = False,
140
- top_logprobs: int | None = None,
141
- ) -> dict[str, bool]:
142
- """
143
- Detect if the input is phrased as a question.
144
-
145
- Returns:
146
- ToolOutput: Object containing:
147
- - result (bool): True if text is a question, False otherwise
148
- - logprobs (list | None): Probability data if logprobs enabled
149
- - analysis (str | None): Detailed reasoning if with_analysis enabled
150
- """
151
- return await self.operator.run(
152
- # User parameters
153
- text=text,
154
- with_analysis=with_analysis,
155
- user_prompt=user_prompt,
156
- temperature=temperature,
157
- logprobs=logprobs,
158
- top_logprobs=top_logprobs,
159
- # Internal parameters
160
- prompt_file="is_question.yaml",
161
- output_model=OutputModels.BoolOutput,
162
- resp_format="parse",
163
- mode=None,
164
- output_lang=None,
165
- )
166
-
167
- async def text_to_question(
168
- self,
169
- text: str,
170
- with_analysis: bool = False,
171
- output_lang: str | None = None,
172
- user_prompt: str | None = None,
173
- temperature: float | None = 0.0,
174
- logprobs: bool = False,
175
- top_logprobs: int | None = None,
176
- ) -> dict[str, str]:
177
- """
178
- Generate a single question from the given text.
179
-
180
- Returns:
181
- ToolOutput: Object containing:
182
- - result (str): The generated question
183
- - logprobs (list | None): Probability data if logprobs enabled
184
- - analysis (str | None): Detailed reasoning if with_analysis enabled
185
- """
186
- return await self.operator.run(
187
- # User parameters
188
- text=text,
189
- with_analysis=with_analysis,
190
- output_lang=output_lang,
191
- user_prompt=user_prompt,
192
- temperature=temperature,
193
- logprobs=logprobs,
194
- top_logprobs=top_logprobs,
195
- # Internal parameters
196
- prompt_file="text_to_question.yaml",
197
- output_model=OutputModels.StrOutput,
198
- resp_format="parse",
199
- mode=None,
200
- )
201
-
202
- async def merge_questions(
203
- self,
204
- text: list[str],
205
- with_analysis: bool = False,
206
- output_lang: str | None = None,
207
- user_prompt: str | None = None,
208
- temperature: float | None = 0.0,
209
- logprobs: bool = False,
210
- top_logprobs: int | None = None,
211
- mode: Literal["default", "reason"] = "default",
212
- ) -> dict[str, str]:
213
- """
214
- Merge multiple questions into a single unified question.
215
-
216
- Returns:
217
- ToolOutput: Object containing:
218
- - result (str): The merged question
219
- - logprobs (list | None): Probability data if logprobs enabled
220
- - analysis (str | None): Detailed reasoning if with_analysis enabled
221
- """
222
- text = ", ".join(text)
223
- return await self.operator.run(
224
- # User parameters
225
- text=text,
226
- with_analysis=with_analysis,
227
- output_lang=output_lang,
228
- user_prompt=user_prompt,
229
- temperature=temperature,
230
- logprobs=logprobs,
231
- top_logprobs=top_logprobs,
232
- # Internal parameters
233
- prompt_file="merge_questions.yaml",
234
- output_model=OutputModels.StrOutput,
235
- resp_format="parse",
236
- mode=mode,
237
- )
238
-
239
- async def rewrite(
240
- self,
241
- text: str,
242
- with_analysis: bool = False,
243
- output_lang: str | None = None,
244
- user_prompt: str | None = None,
245
- temperature: float | None = 0.0,
246
- logprobs: bool = False,
247
- top_logprobs: int | None = None,
248
- mode: Literal["positive", "negative", "hard_negative"] = "positive",
249
- ) -> dict[str, str]:
250
- """
251
- Rewrite a text with different modes.
252
-
253
- Returns:
254
- ToolOutput: Object containing:
255
- - result (str): The rewritten text
256
- - logprobs (list | None): Probability data if logprobs enabled
257
- - analysis (str | None): Detailed reasoning if with_analysis enabled
258
- """
259
- return await self.operator.run(
260
- # User parameters
261
- text=text,
262
- with_analysis=with_analysis,
263
- output_lang=output_lang,
264
- user_prompt=user_prompt,
265
- temperature=temperature,
266
- logprobs=logprobs,
267
- top_logprobs=top_logprobs,
268
- # Internal parameters
269
- prompt_file="rewrite.yaml",
270
- output_model=OutputModels.StrOutput,
271
- resp_format="parse",
272
- mode=mode,
273
- )
274
-
275
- async def subject_to_question(
276
- self,
277
- text: str,
278
- number_of_questions: int,
279
- with_analysis: bool = False,
280
- output_lang: str | None = None,
281
- user_prompt: str | None = None,
282
- temperature: float | None = 0.0,
283
- logprobs: bool = False,
284
- top_logprobs: int | None = None,
285
- ) -> dict[str, list[str]]:
286
- """
287
- Generate a list of questions about a subject.
288
-
289
- Returns:
290
- ToolOutput: Object containing:
291
- - result (list[str]): List of generated questions
292
- - logprobs (list | None): Probability data if logprobs enabled
293
- - analysis (str | None): Detailed reasoning if with_analysis enabled
294
- """
295
- return await self.operator.run(
296
- # User parameters
297
- text=text,
298
- number_of_questions=number_of_questions,
299
- with_analysis=with_analysis,
300
- output_lang=output_lang,
301
- user_prompt=user_prompt,
302
- temperature=temperature,
303
- logprobs=logprobs,
304
- top_logprobs=top_logprobs,
305
- # Internal parameters
306
- prompt_file="subject_to_question.yaml",
307
- output_model=OutputModels.ReasonListStrOutput,
308
- resp_format="parse",
309
- mode=None,
310
- )
311
-
312
- async def summarize(
313
- self,
314
- text: str,
315
- with_analysis: bool = False,
316
- output_lang: str | None = None,
317
- user_prompt: str | None = None,
318
- temperature: float | None = 0.0,
319
- logprobs: bool = False,
320
- top_logprobs: int | None = None,
321
- ) -> dict[str, str]:
322
- """
323
- Summarize the given subject text.
324
-
325
- Returns:
326
- ToolOutput: Object containing:
327
- - result (str): The summary text
328
- - logprobs (list | None): Probability data if logprobs enabled
329
- - analysis (str | None): Detailed reasoning if with_analysis enabled
330
- """
331
- return await self.operator.run(
332
- # User parameters
333
- text=text,
334
- with_analysis=with_analysis,
335
- output_lang=output_lang,
336
- user_prompt=user_prompt,
337
- temperature=temperature,
338
- logprobs=logprobs,
339
- top_logprobs=top_logprobs,
340
- # Internal parameters
341
- prompt_file="summarize.yaml",
342
- output_model=OutputModels.StrOutput,
343
- resp_format="parse",
344
- mode=None,
345
- )
346
-
347
- async def translate(
348
- self,
349
- text: str,
350
- target_language: str,
351
- with_analysis: bool = False,
352
- user_prompt: str | None = None,
353
- temperature: float | None = 0.0,
354
- logprobs: bool = False,
355
- top_logprobs: int | None = None,
356
- ) -> dict[str, str]:
357
- """
358
- Translate text between languages.
359
-
360
- Returns:
361
- ToolOutput: Object containing:
362
- - result (str): The translated text
363
- - logprobs (list | None): Probability data if logprobs enabled
364
- - analysis (str | None): Detailed reasoning if with_analysis enabled
365
- """
366
- return await self.operator.run(
367
- # User parameters
368
- text=text,
369
- target_language=target_language,
370
- with_analysis=with_analysis,
371
- user_prompt=user_prompt,
372
- temperature=temperature,
373
- logprobs=logprobs,
374
- top_logprobs=top_logprobs,
375
- # Internal parameters
376
- prompt_file="translate.yaml",
377
- output_model=OutputModels.StrOutput,
378
- resp_format="parse",
379
- mode=None,
380
- output_lang=None,
381
- )
382
-
383
- async def run_custom(
384
- self,
385
- prompt: str,
386
- output_model: Any,
387
- output_lang: str | None = None,
388
- temperature: float | None = None,
389
- logprobs: bool | None = None,
390
- top_logprobs: int | None = None,
391
- ) -> dict[str, Any]:
392
- """
393
- Custom tool that can do almost anything!
394
-
395
- Returns:
396
- ToolOutput: Object with fields:
397
- - result (str): The output result
398
- """
399
- return await self.operator.run(
400
- # User paramaeters
401
- text=prompt,
402
- output_model=output_model,
403
- output_model_str=output_model.model_json_schema(),
404
- output_lang=output_lang,
405
- temperature=temperature,
406
- logprobs=logprobs,
407
- top_logprobs=top_logprobs,
408
- # Internal parameters
409
- prompt_file="run_custom.yaml",
410
- resp_format="parse",
411
- user_prompt=None,
412
- with_analysis=False,
413
- mode=None,
414
- )
@@ -1,179 +0,0 @@
1
- from typing import Any, TypeVar, Type, Literal
2
- import logging
3
-
4
- from openai import AsyncOpenAI
5
- from pydantic import BaseModel
6
-
7
- from texttools.tools.internals.output_models import ToolOutput
8
- from texttools.tools.internals.base_operator import BaseOperator
9
- from texttools.tools.internals.formatters import Formatter
10
- from texttools.tools.internals.prompt_loader import PromptLoader
11
-
12
- # Base Model type for output models
13
- T = TypeVar("T", bound=BaseModel)
14
-
15
- # Configure logger
16
- logger = logging.getLogger("async_operator")
17
- logger.setLevel(logging.INFO)
18
-
19
-
20
- class AsyncOperator(BaseOperator):
21
- """
22
- Core engine for running text-processing operations with an LLM (Async).
23
-
24
- It wires together:
25
- - `PromptLoader` → loads YAML prompt templates.
26
- - `UserMergeFormatter` → applies formatting to messages (e.g., merging).
27
- - AsyncOpenAI client → executes completions/parsed completions.
28
- """
29
-
30
- def __init__(self, client: AsyncOpenAI, model: str):
31
- self.client = client
32
- self.model = model
33
-
34
- async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
35
- analyze_prompt = prompt_configs["analyze_template"]
36
- analyze_message = [self._build_user_message(analyze_prompt)]
37
- completion = await self.client.chat.completions.create(
38
- model=self.model,
39
- messages=analyze_message,
40
- temperature=temperature,
41
- )
42
- analysis = completion.choices[0].message.content.strip()
43
- return analysis
44
-
45
- async def _parse_completion(
46
- self,
47
- message: list[dict[str, str]],
48
- output_model: Type[T],
49
- temperature: float,
50
- logprobs: bool = False,
51
- top_logprobs: int = 3,
52
- ) -> tuple[Type[T], Any]:
53
- request_kwargs = {
54
- "model": self.model,
55
- "messages": message,
56
- "response_format": output_model,
57
- "temperature": temperature,
58
- }
59
-
60
- if logprobs:
61
- request_kwargs["logprobs"] = True
62
- request_kwargs["top_logprobs"] = top_logprobs
63
-
64
- completion = await self.client.beta.chat.completions.parse(**request_kwargs)
65
- parsed = completion.choices[0].message.parsed
66
- return parsed, completion
67
-
68
- async def _vllm_completion(
69
- self,
70
- message: list[dict[str, str]],
71
- output_model: Type[T],
72
- temperature: float,
73
- logprobs: bool = False,
74
- top_logprobs: int = 3,
75
- ) -> tuple[Type[T], Any]:
76
- json_schema = output_model.model_json_schema()
77
-
78
- # Build kwargs dynamically
79
- request_kwargs = {
80
- "model": self.model,
81
- "messages": message,
82
- "extra_body": {"guided_json": json_schema},
83
- "temperature": temperature,
84
- }
85
-
86
- if logprobs:
87
- request_kwargs["logprobs"] = True
88
- request_kwargs["top_logprobs"] = top_logprobs
89
-
90
- completion = await self.client.chat.completions.create(**request_kwargs)
91
- response = completion.choices[0].message.content
92
-
93
- # Convert the string response to output model
94
- parsed = self._convert_to_output_model(response, output_model)
95
- return parsed, completion
96
-
97
- async def run(
98
- self,
99
- # User parameters
100
- text: str,
101
- with_analysis: bool,
102
- output_lang: str | None,
103
- user_prompt: str | None,
104
- temperature: float,
105
- logprobs: bool,
106
- top_logprobs: int | None,
107
- # Internal parameters
108
- prompt_file: str,
109
- output_model: Type[T],
110
- resp_format: Literal["vllm", "parse"],
111
- mode: str | None,
112
- **extra_kwargs,
113
- ) -> dict[str, Any]:
114
- """
115
- Execute the async LLM pipeline with the given input text. (Async)
116
- """
117
- prompt_loader = PromptLoader()
118
- formatter = Formatter()
119
-
120
- try:
121
- prompt_configs = prompt_loader.load(
122
- prompt_file=prompt_file,
123
- text=text.strip(),
124
- mode=mode,
125
- **extra_kwargs,
126
- )
127
-
128
- messages: list[dict[str, str]] = []
129
-
130
- if with_analysis:
131
- analysis = await self._analyze(prompt_configs, temperature)
132
- messages.append(
133
- self._build_user_message(f"Based on this analysis: {analysis}")
134
- )
135
-
136
- if output_lang:
137
- messages.append(
138
- self._build_user_message(
139
- f"Respond only in the {output_lang} language."
140
- )
141
- )
142
-
143
- if user_prompt:
144
- messages.append(
145
- self._build_user_message(f"Consider this instruction {user_prompt}")
146
- )
147
-
148
- messages.append(self._build_user_message(prompt_configs["main_template"]))
149
- messages = formatter.user_merge_format(messages)
150
-
151
- if resp_format == "vllm":
152
- parsed, completion = await self._vllm_completion(
153
- messages, output_model, temperature, logprobs, top_logprobs
154
- )
155
- elif resp_format == "parse":
156
- parsed, completion = await self._parse_completion(
157
- messages, output_model, temperature, logprobs, top_logprobs
158
- )
159
-
160
- # Ensure output_model has a `result` field
161
- if not hasattr(parsed, "result"):
162
- logger.error(
163
- "The provided output_model must define a field named 'result'"
164
- )
165
-
166
- output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
167
-
168
- output.result = parsed.result
169
-
170
- if logprobs:
171
- output.logprobs = self._extract_logprobs(completion)
172
-
173
- if with_analysis:
174
- output.analysis = analysis
175
-
176
- return output
177
- except Exception as e:
178
- logger.error(f"AsyncTheTool failed: {e}")
179
- return ToolOutput(result="", analysis="", logprobs=[], errors=[str(e)])
@@ -1,91 +0,0 @@
1
- from typing import TypeVar, Type, Any
2
- import json
3
- import re
4
- import math
5
- import logging
6
-
7
- from pydantic import BaseModel
8
- from openai import OpenAI, AsyncOpenAI
9
-
10
- # Base Model type for output models
11
- T = TypeVar("T", bound=BaseModel)
12
-
13
- # Configure logger
14
- logger = logging.getLogger("base_operator")
15
- logger.setLevel(logging.INFO)
16
-
17
-
18
- class BaseOperator:
19
- def __init__(self, client: OpenAI | AsyncOpenAI, model: str):
20
- self.client = client
21
- self.model = model
22
-
23
- def _build_user_message(self, prompt: str) -> dict[str, str]:
24
- return {"role": "user", "content": prompt}
25
-
26
- def _clean_json_response(self, response: str) -> str:
27
- """
28
- Clean JSON response by removing code block markers and whitespace.
29
- Handles cases like:
30
- - ```json{"result": "value"}```
31
- """
32
- stripped = response.strip()
33
- cleaned = re.sub(r"^```(?:json)?\s*", "", stripped)
34
- cleaned = re.sub(r"\s*```$", "", cleaned)
35
-
36
- return cleaned.strip()
37
-
38
- def _convert_to_output_model(
39
- self, response_string: str, output_model: Type[T]
40
- ) -> Type[T]:
41
- """
42
- Convert a JSON response string to output model.
43
-
44
- Args:
45
- response_string: The JSON string (may contain code block markers)
46
- output_model: Your Pydantic output model class (e.g., StrOutput, ListStrOutput)
47
-
48
- Returns:
49
- Instance of your output model
50
- """
51
- # Clean the response string
52
- cleaned_json = self._clean_json_response(response_string)
53
-
54
- # Fix Python-style booleans
55
- cleaned_json = cleaned_json.replace("False", "false").replace("True", "true")
56
-
57
- # Convert string to Python dictionary
58
- response_dict = json.loads(cleaned_json)
59
-
60
- # Convert dictionary to output model
61
- return output_model(**response_dict)
62
-
63
- def _extract_logprobs(self, completion: dict) -> list[dict[str, Any]]:
64
- logprobs_data = []
65
- ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
66
-
67
- for choice in completion.choices:
68
- if not getattr(choice, "logprobs", None):
69
- logger.error("logprobs is not avalible in the chosen model.")
70
- return []
71
-
72
- for logprob_item in choice.logprobs.content:
73
- if ignore_pattern.match(logprob_item.token):
74
- continue
75
- token_entry = {
76
- "token": logprob_item.token,
77
- "prob": round(math.exp(logprob_item.logprob), 8),
78
- "top_alternatives": [],
79
- }
80
- for alt in logprob_item.top_logprobs:
81
- if ignore_pattern.match(alt.token):
82
- continue
83
- token_entry["top_alternatives"].append(
84
- {
85
- "token": alt.token,
86
- "prob": round(math.exp(alt.logprob), 8),
87
- }
88
- )
89
- logprobs_data.append(token_entry)
90
-
91
- return logprobs_data
@@ -1,24 +0,0 @@
1
- class Formatter:
2
- @staticmethod
3
- def user_merge_format(messages: list[dict[str, str]]) -> list[dict[str, str]]:
4
- """
5
- Merges consecutive user messages into a single message, separated by newlines.
6
-
7
- This is useful for condensing a multi-turn user input into a single
8
- message for the LLM. Assistant and system messages are left unchanged and
9
- act as separators between user message groups.
10
- """
11
- merged: list[dict[str, str]] = []
12
-
13
- for message in messages:
14
- role, content = message["role"], message["content"].strip()
15
-
16
- # Merge with previous user turn
17
- if merged and role == "user" and merged[-1]["role"] == "user":
18
- merged[-1]["content"] += "\n" + content
19
-
20
- # Otherwise, start a new turn
21
- else:
22
- merged.append({"role": role, "content": content})
23
-
24
- return merged