hamtaa-texttools 1.0.7__tar.gz → 1.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (38) hide show
  1. {hamtaa_texttools-1.0.7/hamtaa_texttools.egg-info → hamtaa_texttools-1.0.9}/PKG-INFO +14 -23
  2. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/README.md +12 -21
  3. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9/hamtaa_texttools.egg-info}/PKG-INFO +14 -23
  4. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/hamtaa_texttools.egg-info/SOURCES.txt +1 -2
  5. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/pyproject.toml +2 -2
  6. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/tools/async_the_tool.py +60 -15
  7. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/tools/internals/async_operator.py +9 -23
  8. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/tools/internals/base_operator.py +9 -3
  9. hamtaa_texttools-1.0.9/texttools/tools/internals/formatters.py +24 -0
  10. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/tools/internals/operator.py +10 -25
  11. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/tools/internals/prompt_loader.py +3 -12
  12. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/tools/the_tool.py +13 -70
  13. hamtaa_texttools-1.0.7/texttools/formatters/base_formatter.py +0 -33
  14. hamtaa_texttools-1.0.7/texttools/formatters/user_merge_formatter.py +0 -30
  15. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/LICENSE +0 -0
  16. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/MANIFEST.in +0 -0
  17. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  18. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/hamtaa_texttools.egg-info/requires.txt +0 -0
  19. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  20. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/setup.cfg +0 -0
  21. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/__init__.py +0 -0
  22. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/batch/__init__.py +0 -0
  23. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/batch/batch_manager.py +0 -0
  24. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/batch/batch_runner.py +0 -0
  25. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/prompts/README.md +0 -0
  26. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/prompts/categorizer.yaml +0 -0
  27. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/prompts/extract_entities.yaml +0 -0
  28. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/prompts/extract_keywords.yaml +0 -0
  29. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/prompts/is_question.yaml +0 -0
  30. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/prompts/merge_questions.yaml +0 -0
  31. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/prompts/rewrite.yaml +0 -0
  32. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/prompts/run_custom.yaml +0 -0
  33. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/prompts/subject_to_question.yaml +0 -0
  34. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/prompts/summarize.yaml +0 -0
  35. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/prompts/text_to_question.yaml +0 -0
  36. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/prompts/translate.yaml +0 -0
  37. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/tools/__init__.py +0 -0
  38. {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.9}/texttools/tools/internals/output_models.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.0.7
4
- Summary: TextTools is a high-level NLP toolkit built on top of modern LLMs.
3
+ Version: 1.0.9
4
+ Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
6
6
  License: MIT License
7
7
 
@@ -42,8 +42,6 @@ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for ma
42
42
 
43
43
  It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extractor, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
44
44
 
45
- **Thread Safety:** All methods in AsyncTheTool are thread-safe, allowing concurrent usage across multiple threads without conflicts.
46
-
47
45
  ---
48
46
 
49
47
  ## ✨ Features
@@ -78,7 +76,11 @@ Note: This doubles token usage per call because it triggers an additional LLM re
78
76
 
79
77
  - **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
80
78
 
81
- All these flags can be used individually or together to tailor the behavior of any tool in **TextTools**.
79
+ - **`temperature=0.0`** Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
80
+
81
+ All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
82
+
83
+ **Note:** There might be some tools that don't support some of the parameters above.
82
84
 
83
85
  ---
84
86
 
@@ -104,7 +106,6 @@ pip install -U hamtaa-texttools
104
106
 
105
107
  ```python
106
108
  from openai import OpenAI
107
- from pydantic import BaseModel
108
109
  from texttools import TheTool
109
110
 
110
111
  # Create your OpenAI client
@@ -114,29 +115,19 @@ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
114
115
  model = "gpt-4o-mini"
115
116
 
116
117
  # Create an instance of TheTool
117
- # Note: You can give parameters to TheTool so that you don't need to give them to each tool
118
- the_tool = TheTool(client=client, model=model, with_analysis=True, output_lang="English")
118
+ the_tool = TheTool(client=client, model=model)
119
119
 
120
120
  # Example: Question Detection
121
121
  detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
122
122
  print(detection["result"])
123
123
  print(detection["logprobs"])
124
- # Output: True
124
+ # Output: True \n --logprobs
125
125
 
126
126
  # Example: Translation
127
- # Note: You can overwrite with_analysis if defined at TheTool
128
- print(the_tool.translate("سلام، حالت چطوره؟", target_language="English", with_analysis=False)["result"])
129
- # Output: "Hi! How are you?"
130
-
131
- # Example: Custom Tool
132
- # Note: Output model should only contain result key
133
- # Everything else will be ignored
134
- class Custom(BaseModel):
135
- result: list[list[dict[str, int]]]
136
-
137
- custom_prompt = "Something"
138
- custom_result = the_tool.run_custom(custom_prompt, Custom)
139
- print(custom_result)
127
+ translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
128
+ print(translation["result"])
129
+ print(translation["analysis"])
130
+ # Output: "Hi! How are you?" \n --analysis
140
131
  ```
141
132
 
142
133
  ---
@@ -149,7 +140,7 @@ from openai import AsyncOpenAI
149
140
  from texttools import AsyncTheTool
150
141
 
151
142
  async def main():
152
- # Create your async OpenAI client
143
+ # Create your AsyncOpenAI client
153
144
  async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
154
145
 
155
146
  # Specify the model
@@ -8,8 +8,6 @@ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for ma
8
8
 
9
9
  It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extractor, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
10
10
 
11
- **Thread Safety:** All methods in AsyncTheTool are thread-safe, allowing concurrent usage across multiple threads without conflicts.
12
-
13
11
  ---
14
12
 
15
13
  ## ✨ Features
@@ -44,7 +42,11 @@ Note: This doubles token usage per call because it triggers an additional LLM re
44
42
 
45
43
  - **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
46
44
 
47
- All these flags can be used individually or together to tailor the behavior of any tool in **TextTools**.
45
+ - **`temperature=0.0`** Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
46
+
47
+ All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
48
+
49
+ **Note:** There might be some tools that don't support some of the parameters above.
48
50
 
49
51
  ---
50
52
 
@@ -70,7 +72,6 @@ pip install -U hamtaa-texttools
70
72
 
71
73
  ```python
72
74
  from openai import OpenAI
73
- from pydantic import BaseModel
74
75
  from texttools import TheTool
75
76
 
76
77
  # Create your OpenAI client
@@ -80,29 +81,19 @@ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
80
81
  model = "gpt-4o-mini"
81
82
 
82
83
  # Create an instance of TheTool
83
- # Note: You can give parameters to TheTool so that you don't need to give them to each tool
84
- the_tool = TheTool(client=client, model=model, with_analysis=True, output_lang="English")
84
+ the_tool = TheTool(client=client, model=model)
85
85
 
86
86
  # Example: Question Detection
87
87
  detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
88
88
  print(detection["result"])
89
89
  print(detection["logprobs"])
90
- # Output: True
90
+ # Output: True \n --logprobs
91
91
 
92
92
  # Example: Translation
93
- # Note: You can overwrite with_analysis if defined at TheTool
94
- print(the_tool.translate("سلام، حالت چطوره؟", target_language="English", with_analysis=False)["result"])
95
- # Output: "Hi! How are you?"
96
-
97
- # Example: Custom Tool
98
- # Note: Output model should only contain result key
99
- # Everything else will be ignored
100
- class Custom(BaseModel):
101
- result: list[list[dict[str, int]]]
102
-
103
- custom_prompt = "Something"
104
- custom_result = the_tool.run_custom(custom_prompt, Custom)
105
- print(custom_result)
93
+ translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
94
+ print(translation["result"])
95
+ print(translation["analysis"])
96
+ # Output: "Hi! How are you?" \n --analysis
106
97
  ```
107
98
 
108
99
  ---
@@ -115,7 +106,7 @@ from openai import AsyncOpenAI
115
106
  from texttools import AsyncTheTool
116
107
 
117
108
  async def main():
118
- # Create your async OpenAI client
109
+ # Create your AsyncOpenAI client
119
110
  async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
120
111
 
121
112
  # Specify the model
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.0.7
4
- Summary: TextTools is a high-level NLP toolkit built on top of modern LLMs.
3
+ Version: 1.0.9
4
+ Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
6
6
  License: MIT License
7
7
 
@@ -42,8 +42,6 @@ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for ma
42
42
 
43
43
  It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extractor, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
44
44
 
45
- **Thread Safety:** All methods in AsyncTheTool are thread-safe, allowing concurrent usage across multiple threads without conflicts.
46
-
47
45
  ---
48
46
 
49
47
  ## ✨ Features
@@ -78,7 +76,11 @@ Note: This doubles token usage per call because it triggers an additional LLM re
78
76
 
79
77
  - **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
80
78
 
81
- All these flags can be used individually or together to tailor the behavior of any tool in **TextTools**.
79
+ - **`temperature=0.0`** Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
80
+
81
+ All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
82
+
83
+ **Note:** There might be some tools that don't support some of the parameters above.
82
84
 
83
85
  ---
84
86
 
@@ -104,7 +106,6 @@ pip install -U hamtaa-texttools
104
106
 
105
107
  ```python
106
108
  from openai import OpenAI
107
- from pydantic import BaseModel
108
109
  from texttools import TheTool
109
110
 
110
111
  # Create your OpenAI client
@@ -114,29 +115,19 @@ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
114
115
  model = "gpt-4o-mini"
115
116
 
116
117
  # Create an instance of TheTool
117
- # Note: You can give parameters to TheTool so that you don't need to give them to each tool
118
- the_tool = TheTool(client=client, model=model, with_analysis=True, output_lang="English")
118
+ the_tool = TheTool(client=client, model=model)
119
119
 
120
120
  # Example: Question Detection
121
121
  detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
122
122
  print(detection["result"])
123
123
  print(detection["logprobs"])
124
- # Output: True
124
+ # Output: True \n --logprobs
125
125
 
126
126
  # Example: Translation
127
- # Note: You can overwrite with_analysis if defined at TheTool
128
- print(the_tool.translate("سلام، حالت چطوره؟", target_language="English", with_analysis=False)["result"])
129
- # Output: "Hi! How are you?"
130
-
131
- # Example: Custom Tool
132
- # Note: Output model should only contain result key
133
- # Everything else will be ignored
134
- class Custom(BaseModel):
135
- result: list[list[dict[str, int]]]
136
-
137
- custom_prompt = "Something"
138
- custom_result = the_tool.run_custom(custom_prompt, Custom)
139
- print(custom_result)
127
+ translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
128
+ print(translation["result"])
129
+ print(translation["analysis"])
130
+ # Output: "Hi! How are you?" \n --analysis
140
131
  ```
141
132
 
142
133
  ---
@@ -149,7 +140,7 @@ from openai import AsyncOpenAI
149
140
  from texttools import AsyncTheTool
150
141
 
151
142
  async def main():
152
- # Create your async OpenAI client
143
+ # Create your AsyncOpenAI client
153
144
  async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
154
145
 
155
146
  # Specify the model
@@ -11,8 +11,6 @@ texttools/__init__.py
11
11
  texttools/batch/__init__.py
12
12
  texttools/batch/batch_manager.py
13
13
  texttools/batch/batch_runner.py
14
- texttools/formatters/base_formatter.py
15
- texttools/formatters/user_merge_formatter.py
16
14
  texttools/prompts/README.md
17
15
  texttools/prompts/categorizer.yaml
18
16
  texttools/prompts/extract_entities.yaml
@@ -30,6 +28,7 @@ texttools/tools/async_the_tool.py
30
28
  texttools/tools/the_tool.py
31
29
  texttools/tools/internals/async_operator.py
32
30
  texttools/tools/internals/base_operator.py
31
+ texttools/tools/internals/formatters.py
33
32
  texttools/tools/internals/operator.py
34
33
  texttools/tools/internals/output_models.py
35
34
  texttools/tools/internals/prompt_loader.py
@@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "1.0.7"
7
+ version = "1.0.9"
8
8
  authors = [
9
9
  { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
10
10
  { name = "Montazer", email = "montazerh82@gmail.com" },
11
11
  { name = "Givechi", email = "mohamad.m.givechi@gmail.com" },
12
12
  { name = "MoosaviNejad", email = "erfanmoosavi84@gmail.com" },
13
13
  ]
14
- description = "TextTools is a high-level NLP toolkit built on top of modern LLMs."
14
+ description = "A high-level NLP toolkit built on top of modern LLMs."
15
15
  readme = "README.md"
16
16
  license = {file = "LICENSE"}
17
17
  requires-python = ">=3.8"
@@ -10,6 +10,9 @@ class AsyncTheTool:
10
10
  """
11
11
  Async counterpart to TheTool.
12
12
 
13
+ Each method configures the async operator with a specific YAML prompt,
14
+ output schema, and flags, then delegates execution to `operator.run()`.
15
+
13
16
  Usage:
14
17
  async_client = AsyncOpenAI(...)
15
18
  tool = TheToolAsync(async_client, model="model-name")
@@ -27,7 +30,6 @@ class AsyncTheTool:
27
30
  self,
28
31
  text: str,
29
32
  with_analysis: bool = False,
30
- output_lang: str | None = None,
31
33
  user_prompt: str | None = None,
32
34
  temperature: float | None = 0.0,
33
35
  logprobs: bool = False,
@@ -36,20 +38,13 @@ class AsyncTheTool:
36
38
  """
37
39
  Categorize a text into a single Islamic studies domain category.
38
40
 
39
- Args:
40
- text: Input string to categorize.
41
- with_analysis: If True, first runs an LLM "analysis" step and
42
- conditions the main prompt on that analysis.
43
-
44
41
  Returns:
45
- {"result": <category string>}
46
- Example: {"result": "باورهای دینی"}
42
+ {"result": <category string>} + ("logprobs" and "analysis" if enabled)
47
43
  """
48
44
  return await self.operator.run(
49
45
  # User parameters
50
46
  text=text,
51
47
  with_analysis=with_analysis,
52
- output_lang=output_lang,
53
48
  user_prompt=user_prompt,
54
49
  temperature=temperature,
55
50
  logprobs=logprobs,
@@ -59,6 +54,7 @@ class AsyncTheTool:
59
54
  output_model=OutputModels.CategorizerOutput,
60
55
  resp_format="parse",
61
56
  mode=None,
57
+ output_lang=None,
62
58
  )
63
59
 
64
60
  async def extract_keywords(
@@ -71,6 +67,12 @@ class AsyncTheTool:
71
67
  logprobs: bool = False,
72
68
  top_logprobs: int | None = None,
73
69
  ) -> dict[str, list[str]]:
70
+ """
71
+ Extract salient keywords from text.
72
+
73
+ Returns:
74
+ {"result": [<keyword1>, <keyword2>, ...]} + ("logprobs" and "analysis" if enabled)
75
+ """
74
76
  return await self.operator.run(
75
77
  # User parameters
76
78
  text=text,
@@ -97,6 +99,12 @@ class AsyncTheTool:
97
99
  logprobs: bool = False,
98
100
  top_logprobs: int | None = None,
99
101
  ) -> dict[str, list[dict[str, str]]]:
102
+ """
103
+ Perform Named Entity Recognition (NER) over the input text.
104
+
105
+ Returns:
106
+ {"result": [{"text": <entity>, "type": <entity_type>}, ...]} + ("logprobs" and "analysis" if enabled)
107
+ """
100
108
  return await self.operator.run(
101
109
  # User parameters
102
110
  text=text,
@@ -122,6 +130,12 @@ class AsyncTheTool:
122
130
  logprobs: bool = False,
123
131
  top_logprobs: int | None = None,
124
132
  ) -> dict[str, bool]:
133
+ """
134
+ Detect if the input is phrased as a question.
135
+
136
+ Returns:
137
+ {"result": True} or {"result": False} + ("logprobs" and "analysis" if enabled)
138
+ """
125
139
  return await self.operator.run(
126
140
  # User parameters
127
141
  text=text,
@@ -148,6 +162,12 @@ class AsyncTheTool:
148
162
  logprobs: bool = False,
149
163
  top_logprobs: int | None = None,
150
164
  ) -> dict[str, str]:
165
+ """
166
+ Generate a single question from the given text.
167
+
168
+ Returns:
169
+ {"result": <generated_question>} + ("logprobs" and "analysis" if enabled)
170
+ """
151
171
  return await self.operator.run(
152
172
  # User parameters
153
173
  text=text,
@@ -175,6 +195,12 @@ class AsyncTheTool:
175
195
  top_logprobs: int | None = None,
176
196
  mode: Literal["default", "reason"] = "default",
177
197
  ) -> dict[str, str]:
198
+ """
199
+ Merge multiple questions into a single unified question.
200
+
201
+ Returns:
202
+ {"result": <merged_question>} + ("logprobs" and "analysis" if enabled)
203
+ """
178
204
  text = ", ".join(text)
179
205
  return await self.operator.run(
180
206
  # User parameters
@@ -203,6 +229,12 @@ class AsyncTheTool:
203
229
  top_logprobs: int | None = None,
204
230
  mode: Literal["positive", "negative", "hard_negative"] = "positive",
205
231
  ) -> dict[str, str]:
232
+ """
233
+ Rewrite a text with different modes.
234
+
235
+ Returns:
236
+ {"result": <rewritten_text>} + ("logprobs" and "analysis" if enabled)
237
+ """
206
238
  return await self.operator.run(
207
239
  # User parameters
208
240
  text=text,
@@ -230,6 +262,12 @@ class AsyncTheTool:
230
262
  logprobs: bool = False,
231
263
  top_logprobs: int | None = None,
232
264
  ) -> dict[str, list[str]]:
265
+ """
266
+ Generate a list of questions about a subject.
267
+
268
+ Returns:
269
+ {"result": [<question1>, <question2>, ...]} + ("logprobs" and "analysis" if enabled)
270
+ """
233
271
  return await self.operator.run(
234
272
  # User parameters
235
273
  text=text,
@@ -257,6 +295,12 @@ class AsyncTheTool:
257
295
  logprobs: bool = False,
258
296
  top_logprobs: int | None = None,
259
297
  ) -> dict[str, str]:
298
+ """
299
+ Summarize the given subject text.
300
+
301
+ Returns:
302
+ {"result": <summary>} + ("logprobs" and "analysis" if enabled)
303
+ """
260
304
  return await self.operator.run(
261
305
  # User parameters
262
306
  text=text,
@@ -278,18 +322,22 @@ class AsyncTheTool:
278
322
  text: str,
279
323
  target_language: str,
280
324
  with_analysis: bool = False,
281
- output_lang: str | None = None,
282
325
  user_prompt: str | None = None,
283
326
  temperature: float | None = 0.0,
284
327
  logprobs: bool = False,
285
328
  top_logprobs: int | None = None,
286
329
  ) -> dict[str, str]:
330
+ """
331
+ Translate text between languages.
332
+
333
+ Returns:
334
+ {"result": <translated_text>} + ("logprobs" and "analysis" if enabled)
335
+ """
287
336
  return await self.operator.run(
288
337
  # User parameters
289
338
  text=text,
290
339
  target_language=target_language,
291
340
  with_analysis=with_analysis,
292
- output_lang=output_lang,
293
341
  user_prompt=user_prompt,
294
342
  temperature=temperature,
295
343
  logprobs=logprobs,
@@ -299,6 +347,7 @@ class AsyncTheTool:
299
347
  output_model=OutputModels.StrOutput,
300
348
  resp_format="parse",
301
349
  mode=None,
350
+ output_lang=None,
302
351
  )
303
352
 
304
353
  async def run_custom(
@@ -313,10 +362,6 @@ class AsyncTheTool:
313
362
  """
314
363
  Custom tool that can do almost anything!
315
364
 
316
- Args:
317
- prompt: Custom prompt.
318
- output_model: Custom BaseModel output model.
319
-
320
365
  Returns:
321
366
  {"result": <Any>}
322
367
  """
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  from typing import Any, TypeVar, Type, Literal
4
2
  import logging
5
3
 
@@ -7,9 +5,7 @@ from openai import AsyncOpenAI
7
5
  from pydantic import BaseModel
8
6
 
9
7
  from texttools.tools.internals.base_operator import BaseOperator
10
- from texttools.formatters.user_merge_formatter import (
11
- UserMergeFormatter,
12
- )
8
+ from texttools.tools.internals.formatters import Formatter
13
9
  from texttools.tools.internals.prompt_loader import PromptLoader
14
10
 
15
11
  # Base Model type for output models
@@ -31,14 +27,12 @@ class AsyncOperator(BaseOperator):
31
27
  """
32
28
 
33
29
  def __init__(self, client: AsyncOpenAI, model: str):
34
- self.client: AsyncOpenAI = client
30
+ self.client = client
35
31
  self.model = model
36
32
 
37
- async def _analysis_completion(
38
- self,
39
- analyze_message: list[dict[str, str]],
40
- temperature: float,
41
- ) -> str:
33
+ async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
34
+ analyze_prompt = prompt_configs["analyze_template"]
35
+ analyze_message = [self._build_user_message(analyze_prompt)]
42
36
  completion = await self.client.chat.completions.create(
43
37
  model=self.model,
44
38
  messages=analyze_message,
@@ -47,12 +41,6 @@ class AsyncOperator(BaseOperator):
47
41
  analysis = completion.choices[0].message.content.strip()
48
42
  return analysis
49
43
 
50
- async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
51
- analyze_prompt = prompt_configs["analyze_template"]
52
- analyze_message = [self._build_user_message(analyze_prompt)]
53
- analysis = await self._analysis_completion(analyze_message, temperature)
54
- return analysis
55
-
56
44
  async def _parse_completion(
57
45
  self,
58
46
  message: list[dict[str, str]],
@@ -126,14 +114,12 @@ class AsyncOperator(BaseOperator):
126
114
  Execute the async LLM pipeline with the given input text. (Async)
127
115
  """
128
116
  prompt_loader = PromptLoader()
129
- formatter = UserMergeFormatter()
117
+ formatter = Formatter()
130
118
 
131
119
  try:
132
- cleaned_text = text.strip()
133
-
134
120
  prompt_configs = prompt_loader.load(
135
121
  prompt_file=prompt_file,
136
- text=cleaned_text,
122
+ text=text.strip(),
137
123
  mode=mode,
138
124
  **extra_kwargs,
139
125
  )
@@ -159,7 +145,7 @@ class AsyncOperator(BaseOperator):
159
145
  )
160
146
 
161
147
  messages.append(self._build_user_message(prompt_configs["main_template"]))
162
- messages = formatter.format(messages)
148
+ messages = formatter.user_merge_format(messages)
163
149
 
164
150
  if resp_format == "vllm":
165
151
  parsed, completion = await self._vllm_completion(
@@ -188,4 +174,4 @@ class AsyncOperator(BaseOperator):
188
174
 
189
175
  except Exception as e:
190
176
  logger.error(f"Async TheTool failed: {e}")
191
- return {"Error": str(e), "result": ""}
177
+ return {"error": str(e), "result": ""}
@@ -1,7 +1,8 @@
1
- from typing import TypeVar, Type
1
+ from typing import TypeVar, Type, Any
2
2
  import json
3
3
  import re
4
4
  import math
5
+ import logging
5
6
 
6
7
  from pydantic import BaseModel
7
8
  from openai import OpenAI, AsyncOpenAI
@@ -9,6 +10,10 @@ from openai import OpenAI, AsyncOpenAI
9
10
  # Base Model type for output models
10
11
  T = TypeVar("T", bound=BaseModel)
11
12
 
13
+ # Configure logger
14
+ logger = logging.getLogger("base_operator")
15
+ logger.setLevel(logging.INFO)
16
+
12
17
 
13
18
  class BaseOperator:
14
19
  def __init__(self, client: OpenAI | AsyncOpenAI, model: str):
@@ -55,13 +60,14 @@ class BaseOperator:
55
60
  # Convert dictionary to output model
56
61
  return output_model(**response_dict)
57
62
 
58
- def _extract_logprobs(self, completion: dict):
63
+ def _extract_logprobs(self, completion: dict) -> list[dict[str, Any]]:
59
64
  logprobs_data = []
60
65
  ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
61
66
 
62
67
  for choice in completion.choices:
63
68
  if not getattr(choice, "logprobs", None):
64
- continue
69
+ logger.error("logprobs is not avalible in the chosen model.")
70
+ return []
65
71
 
66
72
  for logprob_item in choice.logprobs.content:
67
73
  if ignore_pattern.match(logprob_item.token):
@@ -0,0 +1,24 @@
1
+ class Formatter:
2
+ @staticmethod
3
+ def user_merge_format(messages: list[dict[str, str]]) -> list[dict[str, str]]:
4
+ """
5
+ Merges consecutive user messages into a single message, separated by newlines.
6
+
7
+ This is useful for condensing a multi-turn user input into a single
8
+ message for the LLM. Assistant and system messages are left unchanged and
9
+ act as separators between user message groups.
10
+ """
11
+ merged: list[dict[str, str]] = []
12
+
13
+ for message in messages:
14
+ role, content = message["role"], message["content"].strip()
15
+
16
+ # Merge with previous user turn
17
+ if merged and role == "user" and merged[-1]["role"] == "user":
18
+ merged[-1]["content"] += "\n" + content
19
+
20
+ # Otherwise, start a new turn
21
+ else:
22
+ merged.append({"role": role, "content": content})
23
+
24
+ return merged
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  from typing import Any, TypeVar, Type, Literal
4
2
  import logging
5
3
 
@@ -7,9 +5,7 @@ from openai import OpenAI
7
5
  from pydantic import BaseModel
8
6
 
9
7
  from texttools.tools.internals.base_operator import BaseOperator
10
- from texttools.formatters.user_merge_formatter import (
11
- UserMergeFormatter,
12
- )
8
+ from texttools.tools.internals.formatters import Formatter
13
9
  from texttools.tools.internals.prompt_loader import PromptLoader
14
10
 
15
11
  # Base Model type for output models
@@ -22,7 +18,7 @@ logger.setLevel(logging.INFO)
22
18
 
23
19
  class Operator(BaseOperator):
24
20
  """
25
- Core engine for running text-processing operations with an LLM.
21
+ Core engine for running text-processing operations with an LLM (Sync).
26
22
 
27
23
  It wires together:
28
24
  - `PromptLoader` → loads YAML prompt templates.
@@ -31,14 +27,12 @@ class Operator(BaseOperator):
31
27
  """
32
28
 
33
29
  def __init__(self, client: OpenAI, model: str):
34
- self.client: OpenAI = client
30
+ self.client = client
35
31
  self.model = model
36
32
 
37
- def _analysis_completion(
38
- self,
39
- analyze_message: list[dict[str, str]],
40
- temperature: float,
41
- ) -> str:
33
+ def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
34
+ analyze_prompt = prompt_configs["analyze_template"]
35
+ analyze_message = [self._build_user_message(analyze_prompt)]
42
36
  completion = self.client.chat.completions.create(
43
37
  model=self.model,
44
38
  messages=analyze_message,
@@ -47,12 +41,6 @@ class Operator(BaseOperator):
47
41
  analysis = completion.choices[0].message.content.strip()
48
42
  return analysis
49
43
 
50
- def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
51
- analyze_prompt = prompt_configs["analyze_template"]
52
- analyze_message = [self._build_user_message(analyze_prompt)]
53
- analysis = self._analysis_completion(analyze_message, temperature)
54
- return analysis
55
-
56
44
  def _parse_completion(
57
45
  self,
58
46
  message: list[dict[str, str]],
@@ -83,7 +71,6 @@ class Operator(BaseOperator):
83
71
  temperature: float,
84
72
  logprobs: bool = False,
85
73
  top_logprobs: int = 3,
86
- max_tokens: int | None = None,
87
74
  ) -> tuple[Type[T], Any]:
88
75
  json_schema = output_model.model_json_schema()
89
76
 
@@ -127,14 +114,12 @@ class Operator(BaseOperator):
127
114
  Execute the LLM pipeline with the given input text.
128
115
  """
129
116
  prompt_loader = PromptLoader()
130
- formatter = UserMergeFormatter()
117
+ formatter = Formatter()
131
118
 
132
119
  try:
133
- cleaned_text = text.strip()
134
-
135
120
  prompt_configs = prompt_loader.load(
136
121
  prompt_file=prompt_file,
137
- text=cleaned_text,
122
+ text=text.strip(),
138
123
  mode=mode,
139
124
  **extra_kwargs,
140
125
  )
@@ -160,7 +145,7 @@ class Operator(BaseOperator):
160
145
  )
161
146
 
162
147
  messages.append(self._build_user_message(prompt_configs["main_template"]))
163
- messages = formatter.format(messages)
148
+ messages = formatter.user_merge_format(messages)
164
149
 
165
150
  if resp_format == "vllm":
166
151
  parsed, completion = self._vllm_completion(
@@ -189,4 +174,4 @@ class Operator(BaseOperator):
189
174
 
190
175
  except Exception as e:
191
176
  logger.error(f"TheTool failed: {e}")
192
- return {"Error": str(e), "result": ""}
177
+ return {"error": str(e), "result": ""}
@@ -18,24 +18,15 @@ class PromptLoader:
18
18
  }
19
19
  """
20
20
 
21
- def __init__(self):
22
- self.base_dir = Path(__file__).parent.parent.parent / Path("prompts")
23
-
24
21
  MAIN_TEMPLATE: str = "main_template"
25
22
  ANALYZE_TEMPLATE: str = "analyze_template"
26
23
 
27
24
  # Use lru_cache to load each file once
28
25
  @lru_cache(maxsize=32)
29
26
  def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
30
- prompt_path = self.base_dir / prompt_file
31
-
32
- if not prompt_path.exists():
33
- raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
34
-
35
- try:
36
- data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
37
- except yaml.YAMLError as e:
38
- raise ValueError(f"Invalid YAML in {prompt_path}: {e}")
27
+ base_dir = Path(__file__).parent.parent.parent / Path("prompts")
28
+ prompt_path = base_dir / prompt_file
29
+ data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
39
30
 
40
31
  return {
41
32
  self.MAIN_TEMPLATE: data[self.MAIN_TEMPLATE][mode]
@@ -28,7 +28,6 @@ class TheTool:
28
28
  self,
29
29
  text: str,
30
30
  with_analysis: bool = False,
31
- output_lang: str | None = None,
32
31
  user_prompt: str | None = None,
33
32
  temperature: float | None = 0.0,
34
33
  logprobs: bool = False,
@@ -37,20 +36,13 @@ class TheTool:
37
36
  """
38
37
  Categorize a text into a single Islamic studies domain category.
39
38
 
40
- Args:
41
- text: Input string to categorize.
42
- with_analysis: If True, first runs an LLM "analysis" step and
43
- conditions the main prompt on that analysis.
44
-
45
39
  Returns:
46
- {"result": <category string>}
47
- Example: {"result": "باورهای دینی"}
40
+ {"result": <category string>} + ("logprobs" and "analysis" if enabled)
48
41
  """
49
42
  return self.operator.run(
50
43
  # User parameters
51
44
  text=text,
52
45
  with_analysis=with_analysis,
53
- output_lang=output_lang,
54
46
  user_prompt=user_prompt,
55
47
  temperature=temperature,
56
48
  logprobs=logprobs,
@@ -60,6 +52,7 @@ class TheTool:
60
52
  output_model=OutputModels.CategorizerOutput,
61
53
  resp_format="parse",
62
54
  mode=None,
55
+ output_lang=None,
63
56
  )
64
57
 
65
58
  def extract_keywords(
@@ -75,12 +68,8 @@ class TheTool:
75
68
  """
76
69
  Extract salient keywords from text.
77
70
 
78
- Args:
79
- text: Input string to analyze.
80
- with_analysis: Whether to run an extra LLM reasoning step.
81
-
82
71
  Returns:
83
- {"result": [<keyword1>, <keyword2>, ...]}
72
+ {"result": [<keyword1>, <keyword2>, ...]} + ("logprobs" and "analysis" if enabled)
84
73
  """
85
74
  return self.operator.run(
86
75
  # User parameters
@@ -111,12 +100,8 @@ class TheTool:
111
100
  """
112
101
  Perform Named Entity Recognition (NER) over the input text.
113
102
 
114
- Args:
115
- text: Input string.
116
- with_analysis: Whether to run an extra LLM reasoning step.
117
-
118
103
  Returns:
119
- {"result": [{"text": <entity>, "type": <entity_type>}, ...]}
104
+ {"result": [{"text": <entity>, "type": <entity_type>}, ...]} + ("logprobs" and "analysis" if enabled)
120
105
  """
121
106
  return self.operator.run(
122
107
  # User parameters
@@ -146,12 +131,8 @@ class TheTool:
146
131
  """
147
132
  Detect if the input is phrased as a question.
148
133
 
149
- Args:
150
- question: Input string to evaluate.
151
- with_analysis: Whether to include an analysis step.
152
-
153
134
  Returns:
154
- {"result": "true"} or {"result": "false"}
135
+ {"result": True} or {"result": False} + ("logprobs" and "analysis" if enabled)
155
136
  """
156
137
  return self.operator.run(
157
138
  # User parameters
@@ -182,12 +163,8 @@ class TheTool:
182
163
  """
183
164
  Generate a single question from the given text.
184
165
 
185
- Args:
186
- text: Source text to derive a question from.
187
- with_analysis: Whether to use analysis before generation.
188
-
189
166
  Returns:
190
- {"result": <generated_question>}
167
+ {"result": <generated_question>} + ("logprobs" and "analysis" if enabled)
191
168
  """
192
169
  return self.operator.run(
193
170
  # User parameters
@@ -219,15 +196,8 @@ class TheTool:
219
196
  """
220
197
  Merge multiple questions into a single unified question.
221
198
 
222
- Args:
223
- questions: List of question strings.
224
- mode: Merge strategy:
225
- - "default": simple merging.
226
- - "reason": merging with reasoning explanation.
227
- with_analysis: Whether to use an analysis step.
228
-
229
199
  Returns:
230
- {"result": <merged_question>}
200
+ {"result": <merged_question>} + ("logprobs" and "analysis" if enabled)
231
201
  """
232
202
  text = ", ".join(text)
233
203
  return self.operator.run(
@@ -258,17 +228,10 @@ class TheTool:
258
228
  mode: Literal["positive", "negative", "hard_negative"] = "positive",
259
229
  ) -> dict[str, str]:
260
230
  """
261
- Rewrite a question with different wording or meaning.
262
-
263
- Args:
264
- question: Input question to rewrite.
265
- mode: Rewrite strategy:
266
- - "positive": keep meaning, change words.
267
- - "negative": alter meaning, preserve wording style.
268
- with_analysis: Whether to include an analysis step.
231
+ Rewrite a text with different modes.
269
232
 
270
233
  Returns:
271
- {"result": <rewritten_question>}
234
+ {"result": <rewritten_text>} + ("logprobs" and "analysis" if enabled)
272
235
  """
273
236
  return self.operator.run(
274
237
  # User parameters
@@ -300,14 +263,8 @@ class TheTool:
300
263
  """
301
264
  Generate a list of questions about a subject.
302
265
 
303
- Args:
304
- subject: Topic of interest.
305
- number_of_questions: Number of questions to produce.
306
- language: Target language for generated questions.
307
- with_analysis: Whether to include an analysis step.
308
-
309
266
  Returns:
310
- {"result": [<question1>, <question2>, ...]}
267
+ {"result": [<question1>, <question2>, ...]} + ("logprobs" and "analysis" if enabled)
311
268
  """
312
269
  return self.operator.run(
313
270
  # User parameters
@@ -339,12 +296,8 @@ class TheTool:
339
296
  """
340
297
  Summarize the given subject text.
341
298
 
342
- Args:
343
- subject: Input text to summarize.
344
- with_analysis: Whether to include an analysis step.
345
-
346
299
  Returns:
347
- {"result": <summary>}
300
+ {"result": <summary>} + ("logprobs" and "analysis" if enabled)
348
301
  """
349
302
  return self.operator.run(
350
303
  # User parameters
@@ -367,7 +320,6 @@ class TheTool:
367
320
  text: str,
368
321
  target_language: str,
369
322
  with_analysis: bool = False,
370
- output_lang: str | None = None,
371
323
  user_prompt: str | None = None,
372
324
  temperature: float | None = 0.0,
373
325
  logprobs: bool = False,
@@ -376,20 +328,14 @@ class TheTool:
376
328
  """
377
329
  Translate text between languages.
378
330
 
379
- Args:
380
- text: Input string to translate.
381
- target_language: Language code or name to translate into.
382
- with_analysis: Whether to include an analysis step.
383
-
384
331
  Returns:
385
- {"result": <translated_text>}
332
+ {"result": <translated_text>} + ("logprobs" and "analysis" if enabled)
386
333
  """
387
334
  return self.operator.run(
388
335
  # User parameters
389
336
  text=text,
390
337
  target_language=target_language,
391
338
  with_analysis=with_analysis,
392
- output_lang=output_lang,
393
339
  user_prompt=user_prompt,
394
340
  temperature=temperature,
395
341
  logprobs=logprobs,
@@ -399,6 +345,7 @@ class TheTool:
399
345
  output_model=OutputModels.StrOutput,
400
346
  resp_format="parse",
401
347
  mode=None,
348
+ output_lang=None,
402
349
  )
403
350
 
404
351
  def run_custom(
@@ -413,10 +360,6 @@ class TheTool:
413
360
  """
414
361
  Custom tool that can do almost anything!
415
362
 
416
- Args:
417
- prompt: Custom prompt.
418
- output_model: Custom BaseModel output model.
419
-
420
363
  Returns:
421
364
  {"result": <Any>}
422
365
  """
@@ -1,33 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Any
3
-
4
-
5
- class BaseFormatter(ABC):
6
- """
7
- Adapter to convert a conversation into a specific LLM API's input format.
8
-
9
- Concrete implementations transform standardized messages (e.g., list[dict]) into the
10
- exact payload required by a provider (e.g., OpenAI's message list, a single string, etc.).
11
- """
12
-
13
- @abstractmethod
14
- def format(
15
- self,
16
- messages: Any,
17
- ) -> Any:
18
- """
19
- Transform the input messages into a provider-specific payload.
20
-
21
- Args:
22
- messages: The input conversation. While often a list of dicts with
23
- 'role' and 'content' keys, the exact type and structure may vary
24
- by implementation.
25
-
26
- Returns:
27
- A payload in the format expected by the target LLM API. This could be:
28
- - A list of role-content dictionaries (e.g., for OpenAI)
29
- - A single formatted string (e.g., for completion-style APIs)
30
- - A complex dictionary with additional parameters
31
- - Any other provider-specific data structure
32
- """
33
- pass
@@ -1,30 +0,0 @@
1
- from texttools.formatters.base_formatter import BaseFormatter
2
-
3
-
4
- class UserMergeFormatter(BaseFormatter):
5
- """
6
- Merges consecutive user messages into a single message, separated by newlines.
7
-
8
- This is useful for condensing a multi-turn user input into a single coherent
9
- message for the LLM. Assistant and system messages are left unchanged and
10
- act as separators between user message groups.
11
-
12
- Raises:
13
- ValueError: If the input messages have invalid structure or roles.
14
- """
15
-
16
- def format(self, messages: list[dict[str, str]]) -> list[dict[str, str]]:
17
- merged: list[dict[str, str]] = []
18
-
19
- for message in messages:
20
- role, content = message["role"], message["content"].strip()
21
-
22
- # Merge with previous user turn
23
- if merged and role == "user" and merged[-1]["role"] == "user":
24
- merged[-1]["content"] += "\n" + content
25
-
26
- # Otherwise, start a new turn
27
- else:
28
- merged.append({"role": role, "content": content})
29
-
30
- return merged