hamtaa-texttools 2.2.0__tar.gz → 2.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/PKG-INFO +16 -3
  2. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/README.md +14 -2
  3. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/hamtaa_texttools.egg-info/PKG-INFO +16 -3
  4. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/hamtaa_texttools.egg-info/requires.txt +1 -0
  5. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/pyproject.toml +2 -1
  6. hamtaa_texttools-2.3.1/texttools/__init__.py +4 -0
  7. hamtaa_texttools-2.3.1/texttools/core/__init__.py +34 -0
  8. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/core/internal_models.py +49 -0
  9. hamtaa_texttools-2.3.1/texttools/core/operators/__init__.py +4 -0
  10. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/core/operators/async_operator.py +12 -3
  11. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/core/operators/sync_operator.py +10 -3
  12. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/core/utils.py +34 -2
  13. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/models.py +4 -0
  14. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/prompts/to_question.yaml +0 -2
  15. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/prompts/translate.yaml +2 -2
  16. hamtaa_texttools-2.3.1/texttools/tools/__init__.py +5 -0
  17. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/tools/async_tools.py +76 -18
  18. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/tools/batch_tools.py +9 -0
  19. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/tools/sync_tools.py +76 -18
  20. hamtaa_texttools-2.2.0/texttools/__init__.py +0 -6
  21. hamtaa_texttools-2.2.0/texttools/core/__init__.py +0 -0
  22. hamtaa_texttools-2.2.0/texttools/core/operators/__init__.py +0 -0
  23. hamtaa_texttools-2.2.0/texttools/tools/__init__.py +0 -0
  24. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/LICENSE +0 -0
  25. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/hamtaa_texttools.egg-info/SOURCES.txt +0 -0
  26. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  27. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  28. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/setup.cfg +0 -0
  29. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/tests/test_category_tree.py +0 -0
  30. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/tests/test_to_chunks.py +0 -0
  31. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/core/exceptions.py +0 -0
  32. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/prompts/augment.yaml +0 -0
  33. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/prompts/categorize.yaml +0 -0
  34. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/prompts/extract_entities.yaml +0 -0
  35. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/prompts/extract_keywords.yaml +0 -0
  36. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/prompts/is_fact.yaml +0 -0
  37. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/prompts/is_question.yaml +0 -0
  38. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/prompts/merge_questions.yaml +0 -0
  39. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/prompts/propositionize.yaml +0 -0
  40. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/prompts/run_custom.yaml +0 -0
  41. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/prompts/summarize.yaml +0 -0
  42. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.1}/texttools/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 2.2.0
3
+ Version: 2.3.1
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -17,6 +17,7 @@ License-File: LICENSE
17
17
  Requires-Dist: dotenv>=0.9.9
18
18
  Requires-Dist: openai>=1.97.1
19
19
  Requires-Dist: pydantic>=2.0.0
20
+ Requires-Dist: pytest>=9.0.2
20
21
  Requires-Dist: pyyaml>=6.0
21
22
  Dynamic: license-file
22
23
 
@@ -108,20 +109,32 @@ pip install -U hamtaa-texttools
108
109
  ## 🧩 ToolOutput
109
110
 
110
111
  Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
112
+
111
113
  - **`result: Any`**
112
114
  - **`analysis: str`**
113
115
  - **`logprobs: list`**
114
116
  - **`errors: list[str]`**
115
- - **`ToolOutputMetadata`**
117
+ - **`ToolOutputMetadata`**
116
118
  - **`tool_name: str`**
119
+ - **`processed_by: str`**
117
120
  - **`processed_at: datetime`**
118
121
  - **`execution_time: float`**
122
+ - **`token_usage: TokenUsage`**
123
+ - **`completion_usage: CompletionUsage`**
124
+ - **`prompt_tokens: int`**
125
+ - **`completion_tokens: int`**
126
+ - **`total_tokens: int`**
127
+ - **`analyze_usage: AnalyzeUsage`**
128
+ - **`prompt_tokens: int`**
129
+ - **`completion_tokens: int`**
130
+ - **`total_tokens: int`**
131
+ - **`total_tokens: int`**
119
132
 
120
133
  - Serialize output to JSON using the `to_json()` method.
121
134
  - Verify operation success with the `is_successful()` method.
122
135
  - Convert output to a dictionary with the `to_dict()` method.
123
136
 
124
- **Note:** For BatchTheTool: Each method returns a list[ToolOutput] containing results for all input texts.
137
+ **Note:** For BatchTheTool: Each method returns a `list[ToolOutput]` containing results for all input texts.
125
138
 
126
139
  ---
127
140
 
@@ -86,20 +86,32 @@ pip install -U hamtaa-texttools
86
86
  ## 🧩 ToolOutput
87
87
 
88
88
  Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
89
+
89
90
  - **`result: Any`**
90
91
  - **`analysis: str`**
91
92
  - **`logprobs: list`**
92
93
  - **`errors: list[str]`**
93
- - **`ToolOutputMetadata`**
94
+ - **`ToolOutputMetadata`**
94
95
  - **`tool_name: str`**
96
+ - **`processed_by: str`**
95
97
  - **`processed_at: datetime`**
96
98
  - **`execution_time: float`**
99
+ - **`token_usage: TokenUsage`**
100
+ - **`completion_usage: CompletionUsage`**
101
+ - **`prompt_tokens: int`**
102
+ - **`completion_tokens: int`**
103
+ - **`total_tokens: int`**
104
+ - **`analyze_usage: AnalyzeUsage`**
105
+ - **`prompt_tokens: int`**
106
+ - **`completion_tokens: int`**
107
+ - **`total_tokens: int`**
108
+ - **`total_tokens: int`**
97
109
 
98
110
  - Serialize output to JSON using the `to_json()` method.
99
111
  - Verify operation success with the `is_successful()` method.
100
112
  - Convert output to a dictionary with the `to_dict()` method.
101
113
 
102
- **Note:** For BatchTheTool: Each method returns a list[ToolOutput] containing results for all input texts.
114
+ **Note:** For BatchTheTool: Each method returns a `list[ToolOutput]` containing results for all input texts.
103
115
 
104
116
  ---
105
117
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 2.2.0
3
+ Version: 2.3.1
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -17,6 +17,7 @@ License-File: LICENSE
17
17
  Requires-Dist: dotenv>=0.9.9
18
18
  Requires-Dist: openai>=1.97.1
19
19
  Requires-Dist: pydantic>=2.0.0
20
+ Requires-Dist: pytest>=9.0.2
20
21
  Requires-Dist: pyyaml>=6.0
21
22
  Dynamic: license-file
22
23
 
@@ -108,20 +109,32 @@ pip install -U hamtaa-texttools
108
109
  ## 🧩 ToolOutput
109
110
 
110
111
  Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
112
+
111
113
  - **`result: Any`**
112
114
  - **`analysis: str`**
113
115
  - **`logprobs: list`**
114
116
  - **`errors: list[str]`**
115
- - **`ToolOutputMetadata`**
117
+ - **`ToolOutputMetadata`**
116
118
  - **`tool_name: str`**
119
+ - **`processed_by: str`**
117
120
  - **`processed_at: datetime`**
118
121
  - **`execution_time: float`**
122
+ - **`token_usage: TokenUsage`**
123
+ - **`completion_usage: CompletionUsage`**
124
+ - **`prompt_tokens: int`**
125
+ - **`completion_tokens: int`**
126
+ - **`total_tokens: int`**
127
+ - **`analyze_usage: AnalyzeUsage`**
128
+ - **`prompt_tokens: int`**
129
+ - **`completion_tokens: int`**
130
+ - **`total_tokens: int`**
131
+ - **`total_tokens: int`**
119
132
 
120
133
  - Serialize output to JSON using the `to_json()` method.
121
134
  - Verify operation success with the `is_successful()` method.
122
135
  - Convert output to a dictionary with the `to_dict()` method.
123
136
 
124
- **Note:** For BatchTheTool: Each method returns a list[ToolOutput] containing results for all input texts.
137
+ **Note:** For BatchTheTool: Each method returns a `list[ToolOutput]` containing results for all input texts.
125
138
 
126
139
  ---
127
140
 
@@ -1,4 +1,5 @@
1
1
  dotenv>=0.9.9
2
2
  openai>=1.97.1
3
3
  pydantic>=2.0.0
4
+ pytest>=9.0.2
4
5
  pyyaml>=6.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "2.2.0"
7
+ version = "2.3.1"
8
8
  authors = [
9
9
  {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
10
10
  {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
@@ -24,6 +24,7 @@ dependencies = [
24
24
  "dotenv>=0.9.9",
25
25
  "openai>=1.97.1",
26
26
  "pydantic>=2.0.0",
27
+ "pytest>=9.0.2",
27
28
  "pyyaml>=6.0",
28
29
  ]
29
30
  keywords = ["nlp", "llm", "text-processing", "openai"]
@@ -0,0 +1,4 @@
1
+ from .models import CategoryTree
2
+ from .tools import AsyncTheTool, BatchTheTool, TheTool
3
+
4
+ __all__ = ["CategoryTree", "AsyncTheTool", "BatchTheTool", "TheTool"]
@@ -0,0 +1,34 @@
1
+ from .exceptions import LLMError, PromptError, TextToolsError, ValidationError
2
+ from .internal_models import (
3
+ Bool,
4
+ ListDictStrStr,
5
+ ListStr,
6
+ ReasonListStr,
7
+ Str,
8
+ TokenUsage,
9
+ create_dynamic_model,
10
+ )
11
+ from .operators import AsyncOperator, Operator
12
+ from .utils import OperatorUtils, TheToolUtils
13
+
14
+ __all__ = [
15
+ # Exceptions
16
+ "LLMError",
17
+ "PromptError",
18
+ "TextToolsError",
19
+ "ValidationError",
20
+ # Internal models
21
+ "Bool",
22
+ "ListDictStrStr",
23
+ "ListStr",
24
+ "ReasonListStr",
25
+ "Str",
26
+ "TokenUsage",
27
+ "create_dynamic_model",
28
+ # Operators
29
+ "AsyncOperator",
30
+ "Operator",
31
+ # Utils
32
+ "OperatorUtils",
33
+ "TheToolUtils",
34
+ ]
@@ -1,12 +1,61 @@
1
+ from __future__ import annotations
2
+
1
3
  from typing import Any, Literal
2
4
 
3
5
  from pydantic import BaseModel, Field, create_model
4
6
 
5
7
 
8
+ class CompletionUsage(BaseModel):
9
+ prompt_tokens: int = 0
10
+ completion_tokens: int = 0
11
+ total_tokens: int = 0
12
+
13
+
14
+ class AnalyzeUsage(BaseModel):
15
+ prompt_tokens: int = 0
16
+ completion_tokens: int = 0
17
+ total_tokens: int = 0
18
+
19
+
20
+ class TokenUsage(BaseModel):
21
+ completion_usage: CompletionUsage = CompletionUsage()
22
+ analyze_usage: AnalyzeUsage = AnalyzeUsage()
23
+ total_tokens: int = 0
24
+
25
+ def __add__(self, other: TokenUsage) -> TokenUsage:
26
+ new_completion_usage = CompletionUsage(
27
+ prompt_tokens=self.completion_usage.prompt_tokens
28
+ + other.completion_usage.prompt_tokens,
29
+ completion_tokens=self.completion_usage.completion_tokens
30
+ + other.completion_usage.completion_tokens,
31
+ total_tokens=self.completion_usage.total_tokens
32
+ + other.completion_usage.total_tokens,
33
+ )
34
+ new_analyze_usage = AnalyzeUsage(
35
+ prompt_tokens=self.analyze_usage.prompt_tokens
36
+ + other.analyze_usage.prompt_tokens,
37
+ completion_tokens=self.analyze_usage.completion_tokens
38
+ + other.analyze_usage.completion_tokens,
39
+ total_tokens=self.analyze_usage.total_tokens
40
+ + other.analyze_usage.total_tokens,
41
+ )
42
+ total_tokens = (
43
+ new_completion_usage.total_tokens + new_analyze_usage.total_tokens
44
+ )
45
+
46
+ return TokenUsage(
47
+ completion_usage=new_completion_usage,
48
+ analyze_usage=new_analyze_usage,
49
+ total_tokens=total_tokens,
50
+ )
51
+
52
+
6
53
  class OperatorOutput(BaseModel):
7
54
  result: Any
8
55
  analysis: str | None
9
56
  logprobs: list[dict[str, Any]] | None
57
+ processed_by: str
58
+ token_usage: TokenUsage | None = None
10
59
 
11
60
 
12
61
  class Str(BaseModel):
@@ -0,0 +1,4 @@
1
+ from .async_operator import AsyncOperator
2
+ from .sync_operator import Operator
3
+
4
+ __all__ = ["AsyncOperator", "Operator"]
@@ -18,7 +18,9 @@ class AsyncOperator:
18
18
  self._client = client
19
19
  self._model = model
20
20
 
21
- async def _analyze_completion(self, analyze_message: list[dict[str, str]]) -> str:
21
+ async def _analyze_completion(
22
+ self, analyze_message: list[dict[str, str]]
23
+ ) -> tuple[str, Any]:
22
24
  try:
23
25
  completion = await self._client.chat.completions.create(
24
26
  model=self._model,
@@ -33,7 +35,7 @@ class AsyncOperator:
33
35
  if not analysis:
34
36
  raise LLMError("Empty analysis response")
35
37
 
36
- return analysis
38
+ return analysis, completion
37
39
 
38
40
  except Exception as e:
39
41
  if isinstance(e, (PromptError, LLMError)):
@@ -116,12 +118,15 @@ class AsyncOperator:
116
118
  )
117
119
 
118
120
  analysis: str | None = None
121
+ analyze_completion: Any = None
119
122
 
120
123
  if with_analysis:
121
124
  analyze_message = OperatorUtils.build_message(
122
125
  prompt_configs["analyze_template"]
123
126
  )
124
- analysis = await self._analyze_completion(analyze_message)
127
+ analysis, analyze_completion = await self._analyze_completion(
128
+ analyze_message
129
+ )
125
130
 
126
131
  main_prompt = OperatorUtils.build_main_prompt(
127
132
  prompt_configs["main_template"], analysis, output_lang, user_prompt
@@ -176,6 +181,10 @@ class AsyncOperator:
176
181
  logprobs=OperatorUtils.extract_logprobs(completion)
177
182
  if logprobs
178
183
  else None,
184
+ processed_by=self._model,
185
+ token_usage=OperatorUtils.extract_token_usage(
186
+ completion, analyze_completion
187
+ ),
179
188
  )
180
189
 
181
190
  return operator_output
@@ -18,7 +18,9 @@ class Operator:
18
18
  self._client = client
19
19
  self._model = model
20
20
 
21
- def _analyze_completion(self, analyze_message: list[dict[str, str]]) -> str:
21
+ def _analyze_completion(
22
+ self, analyze_message: list[dict[str, str]]
23
+ ) -> tuple[str, Any]:
22
24
  try:
23
25
  completion = self._client.chat.completions.create(
24
26
  model=self._model,
@@ -33,7 +35,7 @@ class Operator:
33
35
  if not analysis:
34
36
  raise LLMError("Empty analysis response")
35
37
 
36
- return analysis
38
+ return analysis, completion
37
39
 
38
40
  except Exception as e:
39
41
  if isinstance(e, (PromptError, LLMError)):
@@ -114,12 +116,13 @@ class Operator:
114
116
  )
115
117
 
116
118
  analysis: str | None = None
119
+ analyze_completion: Any = None
117
120
 
118
121
  if with_analysis:
119
122
  analyze_message = OperatorUtils.build_message(
120
123
  prompt_configs["analyze_template"]
121
124
  )
122
- analysis = self._analyze_completion(analyze_message)
125
+ analysis, analyze_completion = self._analyze_completion(analyze_message)
123
126
 
124
127
  main_prompt = OperatorUtils.build_main_prompt(
125
128
  prompt_configs["main_template"], analysis, output_lang, user_prompt
@@ -174,6 +177,10 @@ class Operator:
174
177
  logprobs=OperatorUtils.extract_logprobs(completion)
175
178
  if logprobs
176
179
  else None,
180
+ processed_by=self._model,
181
+ token_usage=OperatorUtils.extract_token_usage(
182
+ completion, analyze_completion
183
+ ),
177
184
  )
178
185
 
179
186
  return operator_output
@@ -9,6 +9,7 @@ from typing import Any
9
9
  import yaml
10
10
 
11
11
  from .exceptions import PromptError
12
+ from .internal_models import AnalyzeUsage, CompletionUsage, TokenUsage
12
13
 
13
14
 
14
15
  class OperatorUtils:
@@ -109,13 +110,12 @@ class OperatorUtils:
109
110
  return [{"role": "user", "content": prompt}]
110
111
 
111
112
  @staticmethod
112
- def extract_logprobs(completion: Any) -> list[dict]:
113
+ def extract_logprobs(completion: Any) -> list[dict[str, Any]]:
113
114
  """
114
115
  Extracts and filters logprobs from completion.
115
116
  Skips punctuation and structural tokens.
116
117
  """
117
118
  logprobs_data = []
118
-
119
119
  ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
120
120
 
121
121
  for choice in completion.choices:
@@ -148,6 +148,38 @@ class OperatorUtils:
148
148
  new_temp = base_temp + random.choice([-1, 1]) * random.uniform(0.1, 0.9)
149
149
  return max(0.0, min(new_temp, 1.5))
150
150
 
151
+ @staticmethod
152
+ def extract_token_usage(completion: Any, analyze_completion: Any) -> TokenUsage:
153
+ completion_usage = completion.usage
154
+ analyze_usage = analyze_completion.usage if analyze_completion else None
155
+
156
+ completion_usage_model = CompletionUsage(
157
+ prompt_tokens=getattr(completion_usage, "prompt_tokens", 00),
158
+ completion_tokens=getattr(completion_usage, "completion_tokens", 00),
159
+ total_tokens=getattr(completion_usage, "total_tokens", 00),
160
+ )
161
+ analyze_usage_model = AnalyzeUsage(
162
+ prompt_tokens=getattr(analyze_usage, "prompt_tokens", 0),
163
+ completion_tokens=getattr(analyze_usage, "completion_tokens", 0),
164
+ total_tokens=getattr(analyze_usage, "total_tokens", 0),
165
+ )
166
+ total_analyze_tokens = (
167
+ analyze_usage_model.prompt_tokens + analyze_usage_model.completion_tokens
168
+ if analyze_completion
169
+ else 0
170
+ )
171
+ total_tokens = (
172
+ completion_usage_model.prompt_tokens
173
+ + completion_usage_model.completion_tokens
174
+ + total_analyze_tokens
175
+ )
176
+
177
+ return TokenUsage(
178
+ completion_usage=completion_usage_model,
179
+ analyze_usage=analyze_usage_model,
180
+ total_tokens=total_tokens,
181
+ )
182
+
151
183
 
152
184
  class TheToolUtils:
153
185
  """
@@ -5,11 +5,15 @@ from typing import Any
5
5
 
6
6
  from pydantic import BaseModel, Field
7
7
 
8
+ from .core import TokenUsage
9
+
8
10
 
9
11
  class ToolOutputMetadata(BaseModel):
10
12
  tool_name: str
13
+ processed_by: str | None = None
11
14
  processed_at: datetime = Field(default_factory=datetime.now)
12
15
  execution_time: float | None = None
16
+ token_usage: TokenUsage | None = None
13
17
 
14
18
 
15
19
  class ToolOutput(BaseModel):
@@ -7,7 +7,6 @@ main_template:
7
7
  and must not mention any verbs like this, that, he or she in the question.
8
8
 
9
9
  There is a `reason` key, fill that up with a summerized version of your thoughts.
10
- The `reason` must be less than 20 words.
11
10
  Don't forget to fill the reason.
12
11
 
13
12
  Respond only in JSON format:
@@ -23,7 +22,6 @@ main_template:
23
22
  and must not mention any verbs like this, that, he or she in the question.
24
23
 
25
24
  There is a `reason` key, fill that up with a summerized version of your thoughts.
26
- The `reason` must be less than 20 words.
27
25
  Don't forget to fill the reason.
28
26
 
29
27
  Respond only in JSON format:
@@ -3,9 +3,9 @@ main_template: |
3
3
  Output only the translated text.
4
4
 
5
5
  Respond only in JSON format:
6
- {{"result": "string"}}
6
+ {{"result": "translated_text"}}
7
7
 
8
- Don't translate proper name, only transliterate them to {target_lang}
8
+ Don't translate proper names, only transliterate them to {target_lang}
9
9
 
10
10
  Translate the following text to {target_lang}:
11
11
  {text}
@@ -0,0 +1,5 @@
1
+ from .async_tools import AsyncTheTool
2
+ from .batch_tools import BatchTheTool
3
+ from .sync_tools import TheTool
4
+
5
+ __all__ = ["AsyncTheTool", "BatchTheTool", "TheTool"]
@@ -5,17 +5,21 @@ from typing import Any, Literal
5
5
 
6
6
  from openai import AsyncOpenAI
7
7
 
8
- from ..core.exceptions import LLMError, PromptError, TextToolsError, ValidationError
9
- from ..core.internal_models import (
8
+ from ..core import (
9
+ AsyncOperator,
10
10
  Bool,
11
11
  ListDictStrStr,
12
12
  ListStr,
13
+ LLMError,
14
+ PromptError,
13
15
  ReasonListStr,
14
16
  Str,
17
+ TextToolsError,
18
+ TheToolUtils,
19
+ TokenUsage,
20
+ ValidationError,
15
21
  create_dynamic_model,
16
22
  )
17
- from ..core.operators.async_operator import AsyncOperator
18
- from ..core.utils import TheToolUtils
19
23
  from ..models import CategoryTree, ToolOutput, ToolOutputMetadata
20
24
 
21
25
 
@@ -26,6 +30,14 @@ class AsyncTheTool:
26
30
  model: str,
27
31
  raise_on_error: bool = True,
28
32
  ):
33
+ """
34
+ Initialize the AsyncTheTool instance.
35
+
36
+ Args:
37
+ client: An AsyncOpenAI client instance for making asynchronous API calls
38
+ model: The name of the model
39
+ raise_on_error: If True, raises exceptions on errors; if False, logs errors and continues
40
+ """
29
41
  self._operator = AsyncOperator(client=client, model=model)
30
42
  self.logger = logging.getLogger(self.__class__.__name__)
31
43
  self.raise_on_error = raise_on_error
@@ -91,7 +103,10 @@ class AsyncTheTool:
91
103
  )
92
104
 
93
105
  metadata = ToolOutputMetadata(
94
- tool_name=tool_name, execution_time=perf_counter() - start
106
+ tool_name=tool_name,
107
+ execution_time=perf_counter() - start,
108
+ processed_by=operator_output.processed_by,
109
+ token_usage=operator_output.token_usage,
95
110
  )
96
111
  tool_output = ToolOutput(
97
112
  result=operator_output.result,
@@ -106,6 +121,7 @@ class AsyncTheTool:
106
121
  final_categories = []
107
122
  analysis = ""
108
123
  logprobs_list = []
124
+ token_usage = TokenUsage()
109
125
 
110
126
  for _ in range(levels):
111
127
  if not parent_node.children:
@@ -149,9 +165,13 @@ class AsyncTheTool:
149
165
  analysis += level_operator_output.analysis
150
166
  if logprobs:
151
167
  logprobs_list.extend(level_operator_output.logprobs)
168
+ token_usage += level_operator_output.token_usage
152
169
 
153
170
  metadata = ToolOutputMetadata(
154
- tool_name=tool_name, execution_time=(perf_counter() - start)
171
+ tool_name=tool_name,
172
+ execution_time=perf_counter() - start,
173
+ processed_by=level_operator_output.processed_by,
174
+ token_usage=token_usage,
155
175
  )
156
176
  tool_output = ToolOutput(
157
177
  result=final_categories,
@@ -237,7 +257,10 @@ class AsyncTheTool:
237
257
  )
238
258
 
239
259
  metadata = ToolOutputMetadata(
240
- tool_name=tool_name, execution_time=perf_counter() - start
260
+ tool_name=tool_name,
261
+ execution_time=perf_counter() - start,
262
+ processed_by=operator_output.processed_by,
263
+ token_usage=operator_output.token_usage,
241
264
  )
242
265
  tool_output = ToolOutput(
243
266
  result=operator_output.result,
@@ -321,7 +344,10 @@ class AsyncTheTool:
321
344
  )
322
345
 
323
346
  metadata = ToolOutputMetadata(
324
- tool_name=tool_name, execution_time=perf_counter() - start
347
+ tool_name=tool_name,
348
+ execution_time=perf_counter() - start,
349
+ processed_by=operator_output.processed_by,
350
+ token_usage=operator_output.token_usage,
325
351
  )
326
352
  tool_output = ToolOutput(
327
353
  result=operator_output.result,
@@ -400,7 +426,10 @@ class AsyncTheTool:
400
426
  )
401
427
 
402
428
  metadata = ToolOutputMetadata(
403
- tool_name=tool_name, execution_time=perf_counter() - start
429
+ tool_name=tool_name,
430
+ execution_time=perf_counter() - start,
431
+ processed_by=operator_output.processed_by,
432
+ token_usage=operator_output.token_usage,
404
433
  )
405
434
  tool_output = ToolOutput(
406
435
  result=operator_output.result,
@@ -486,7 +515,10 @@ class AsyncTheTool:
486
515
  )
487
516
 
488
517
  metadata = ToolOutputMetadata(
489
- tool_name=tool_name, execution_time=perf_counter() - start
518
+ tool_name=tool_name,
519
+ execution_time=perf_counter() - start,
520
+ processed_by=operator_output.processed_by,
521
+ token_usage=operator_output.token_usage,
490
522
  )
491
523
  tool_output = ToolOutput(
492
524
  result=operator_output.result,
@@ -570,7 +602,10 @@ class AsyncTheTool:
570
602
  )
571
603
 
572
604
  metadata = ToolOutputMetadata(
573
- tool_name=tool_name, execution_time=perf_counter() - start
605
+ tool_name=tool_name,
606
+ execution_time=perf_counter() - start,
607
+ processed_by=operator_output.processed_by,
608
+ token_usage=operator_output.token_usage,
574
609
  )
575
610
  tool_output = ToolOutput(
576
611
  result=operator_output.result,
@@ -653,7 +688,10 @@ class AsyncTheTool:
653
688
  )
654
689
 
655
690
  metadata = ToolOutputMetadata(
656
- tool_name=tool_name, execution_time=perf_counter() - start
691
+ tool_name=tool_name,
692
+ execution_time=perf_counter() - start,
693
+ processed_by=operator_output.processed_by,
694
+ token_usage=operator_output.token_usage,
657
695
  )
658
696
  tool_output = ToolOutput(
659
697
  result=operator_output.result,
@@ -734,7 +772,10 @@ class AsyncTheTool:
734
772
  )
735
773
 
736
774
  metadata = ToolOutputMetadata(
737
- tool_name=tool_name, execution_time=perf_counter() - start
775
+ tool_name=tool_name,
776
+ execution_time=perf_counter() - start,
777
+ processed_by=operator_output.processed_by,
778
+ token_usage=operator_output.token_usage,
738
779
  )
739
780
  tool_output = ToolOutput(
740
781
  result=operator_output.result,
@@ -802,6 +843,7 @@ class AsyncTheTool:
802
843
  translation = ""
803
844
  analysis = ""
804
845
  logprobs_list = []
846
+ token_usage = TokenUsage()
805
847
 
806
848
  for chunk in chunks:
807
849
  chunk_operator_output = await TheToolUtils.run_with_timeout(
@@ -832,9 +874,13 @@ class AsyncTheTool:
832
874
  analysis += chunk_operator_output.analysis
833
875
  if logprobs:
834
876
  logprobs_list.extend(chunk_operator_output.logprobs)
877
+ token_usage += chunk_operator_output.token_usage
835
878
 
836
879
  metadata = ToolOutputMetadata(
837
- tool_name=tool_name, execution_time=perf_counter() - start
880
+ tool_name=tool_name,
881
+ execution_time=perf_counter() - start,
882
+ processed_by=chunk_operator_output.processed_by,
883
+ token_usage=token_usage,
838
884
  )
839
885
  tool_output = ToolOutput(
840
886
  result=translation,
@@ -867,7 +913,10 @@ class AsyncTheTool:
867
913
  )
868
914
 
869
915
  metadata = ToolOutputMetadata(
870
- tool_name=tool_name, execution_time=perf_counter() - start
916
+ tool_name=tool_name,
917
+ execution_time=perf_counter() - start,
918
+ processed_by=operator_output.processed_by,
919
+ token_usage=operator_output.token_usage,
871
920
  )
872
921
  tool_output = ToolOutput(
873
922
  result=operator_output.result,
@@ -950,7 +999,10 @@ class AsyncTheTool:
950
999
  )
951
1000
 
952
1001
  metadata = ToolOutputMetadata(
953
- tool_name=tool_name, execution_time=perf_counter() - start
1002
+ tool_name=tool_name,
1003
+ execution_time=perf_counter() - start,
1004
+ processed_by=operator_output.processed_by,
1005
+ token_usage=operator_output.token_usage,
954
1006
  )
955
1007
  tool_output = ToolOutput(
956
1008
  result=operator_output.result,
@@ -1036,7 +1088,10 @@ class AsyncTheTool:
1036
1088
  )
1037
1089
 
1038
1090
  metadata = ToolOutputMetadata(
1039
- tool_name=tool_name, execution_time=perf_counter() - start
1091
+ tool_name=tool_name,
1092
+ execution_time=perf_counter() - start,
1093
+ processed_by=operator_output.processed_by,
1094
+ token_usage=operator_output.token_usage,
1040
1095
  )
1041
1096
  tool_output = ToolOutput(
1042
1097
  result=operator_output.result,
@@ -1121,7 +1176,10 @@ class AsyncTheTool:
1121
1176
  )
1122
1177
 
1123
1178
  metadata = ToolOutputMetadata(
1124
- tool_name=tool_name, execution_time=perf_counter() - start
1179
+ tool_name=tool_name,
1180
+ execution_time=perf_counter() - start,
1181
+ processed_by=operator_output.processed_by,
1182
+ token_usage=operator_output.token_usage,
1125
1183
  )
1126
1184
  tool_output = ToolOutput(
1127
1185
  result=operator_output.result,
@@ -15,6 +15,15 @@ class BatchTheTool:
15
15
  raise_on_error: bool = True,
16
16
  max_concurrency: int = 5,
17
17
  ):
18
+ """
19
+ Initialize the BatchTheTool instance.
20
+
21
+ Arguments:
22
+ client: An AsyncOpenAI client instance for making asynchronous API calls
23
+ model: The name of the model
24
+ raise_on_error: If True, raises exceptions on errors; if False, logs errors and continues
25
+ max_concurrency: Maximum number of concurrent API requests allowed
26
+ """
18
27
  self.tool = AsyncTheTool(client, model, raise_on_error)
19
28
  self.semaphore = asyncio.Semaphore(max_concurrency)
20
29
 
@@ -5,17 +5,21 @@ from typing import Any, Literal
5
5
 
6
6
  from openai import OpenAI
7
7
 
8
- from ..core.exceptions import LLMError, PromptError, TextToolsError, ValidationError
9
- from ..core.internal_models import (
8
+ from ..core import (
10
9
  Bool,
11
10
  ListDictStrStr,
12
11
  ListStr,
12
+ LLMError,
13
+ Operator,
14
+ PromptError,
13
15
  ReasonListStr,
14
16
  Str,
17
+ TextToolsError,
18
+ TheToolUtils,
19
+ TokenUsage,
20
+ ValidationError,
15
21
  create_dynamic_model,
16
22
  )
17
- from ..core.operators.sync_operator import Operator
18
- from ..core.utils import TheToolUtils
19
23
  from ..models import CategoryTree, ToolOutput, ToolOutputMetadata
20
24
 
21
25
 
@@ -26,6 +30,14 @@ class TheTool:
26
30
  model: str,
27
31
  raise_on_error: bool = True,
28
32
  ):
33
+ """
34
+ Initialize the TheTool instance.
35
+
36
+ Args:
37
+ client: An OpenAI client instance for making API calls
38
+ model: The name of the model
39
+ raise_on_error: If True, raises exceptions on errors; if False, logs errors and continues
40
+ """
29
41
  self._operator = Operator(client=client, model=model)
30
42
  self.logger = logging.getLogger(self.__class__.__name__)
31
43
  self.raise_on_error = raise_on_error
@@ -86,7 +98,10 @@ class TheTool:
86
98
  )
87
99
 
88
100
  metadata = ToolOutputMetadata(
89
- tool_name=tool_name, execution_time=perf_counter() - start
101
+ tool_name=tool_name,
102
+ execution_time=perf_counter() - start,
103
+ processed_by=operator_output.processed_by,
104
+ token_usage=operator_output.token_usage,
90
105
  )
91
106
  tool_output = ToolOutput(
92
107
  result=operator_output.result,
@@ -101,6 +116,7 @@ class TheTool:
101
116
  final_categories = []
102
117
  analysis = ""
103
118
  logprobs_list = []
119
+ token_usage = TokenUsage()
104
120
 
105
121
  for _ in range(levels):
106
122
  if not parent_node.children:
@@ -141,9 +157,13 @@ class TheTool:
141
157
  analysis += level_operator_output.analysis
142
158
  if logprobs:
143
159
  logprobs_list.extend(level_operator_output.logprobs)
160
+ token_usage += level_operator_output.token_usage
144
161
 
145
162
  metadata = ToolOutputMetadata(
146
- tool_name=tool_name, execution_time=(perf_counter() - start)
163
+ tool_name=tool_name,
164
+ execution_time=perf_counter() - start,
165
+ processed_by=level_operator_output.processed_by,
166
+ token_usage=token_usage,
147
167
  )
148
168
  tool_output = ToolOutput(
149
169
  result=final_categories,
@@ -224,7 +244,10 @@ class TheTool:
224
244
  )
225
245
 
226
246
  metadata = ToolOutputMetadata(
227
- tool_name=tool_name, execution_time=perf_counter() - start
247
+ tool_name=tool_name,
248
+ execution_time=perf_counter() - start,
249
+ processed_by=operator_output.processed_by,
250
+ token_usage=operator_output.token_usage,
228
251
  )
229
252
  tool_output = ToolOutput(
230
253
  result=operator_output.result,
@@ -303,7 +326,10 @@ class TheTool:
303
326
  )
304
327
 
305
328
  metadata = ToolOutputMetadata(
306
- tool_name=tool_name, execution_time=perf_counter() - start
329
+ tool_name=tool_name,
330
+ execution_time=perf_counter() - start,
331
+ processed_by=operator_output.processed_by,
332
+ token_usage=operator_output.token_usage,
307
333
  )
308
334
  tool_output = ToolOutput(
309
335
  result=operator_output.result,
@@ -377,7 +403,10 @@ class TheTool:
377
403
  )
378
404
 
379
405
  metadata = ToolOutputMetadata(
380
- tool_name=tool_name, execution_time=perf_counter() - start
406
+ tool_name=tool_name,
407
+ execution_time=perf_counter() - start,
408
+ processed_by=operator_output.processed_by,
409
+ token_usage=operator_output.token_usage,
381
410
  )
382
411
  tool_output = ToolOutput(
383
412
  result=operator_output.result,
@@ -458,7 +487,10 @@ class TheTool:
458
487
  )
459
488
 
460
489
  metadata = ToolOutputMetadata(
461
- tool_name=tool_name, execution_time=perf_counter() - start
490
+ tool_name=tool_name,
491
+ execution_time=perf_counter() - start,
492
+ processed_by=operator_output.processed_by,
493
+ token_usage=operator_output.token_usage,
462
494
  )
463
495
  tool_output = ToolOutput(
464
496
  result=operator_output.result,
@@ -537,7 +569,10 @@ class TheTool:
537
569
  )
538
570
 
539
571
  metadata = ToolOutputMetadata(
540
- tool_name=tool_name, execution_time=perf_counter() - start
572
+ tool_name=tool_name,
573
+ execution_time=perf_counter() - start,
574
+ processed_by=operator_output.processed_by,
575
+ token_usage=operator_output.token_usage,
541
576
  )
542
577
  tool_output = ToolOutput(
543
578
  result=operator_output.result,
@@ -615,7 +650,10 @@ class TheTool:
615
650
  )
616
651
 
617
652
  metadata = ToolOutputMetadata(
618
- tool_name=tool_name, execution_time=perf_counter() - start
653
+ tool_name=tool_name,
654
+ execution_time=perf_counter() - start,
655
+ processed_by=operator_output.processed_by,
656
+ token_usage=operator_output.token_usage,
619
657
  )
620
658
  tool_output = ToolOutput(
621
659
  result=operator_output.result,
@@ -691,7 +729,10 @@ class TheTool:
691
729
  )
692
730
 
693
731
  metadata = ToolOutputMetadata(
694
- tool_name=tool_name, execution_time=perf_counter() - start
732
+ tool_name=tool_name,
733
+ execution_time=perf_counter() - start,
734
+ processed_by=operator_output.processed_by,
735
+ token_usage=operator_output.token_usage,
695
736
  )
696
737
  tool_output = ToolOutput(
697
738
  result=operator_output.result,
@@ -757,6 +798,7 @@ class TheTool:
757
798
  translation = ""
758
799
  analysis = ""
759
800
  logprobs_list = []
801
+ token_usage = TokenUsage()
760
802
 
761
803
  for chunk in chunks:
762
804
  chunk_operator_output = self._operator.run(
@@ -784,9 +826,13 @@ class TheTool:
784
826
  analysis += chunk_operator_output.analysis
785
827
  if logprobs:
786
828
  logprobs_list.extend(chunk_operator_output.logprobs)
829
+ token_usage += chunk_operator_output.token_usage
787
830
 
788
831
  metadata = ToolOutputMetadata(
789
- tool_name=tool_name, execution_time=perf_counter() - start
832
+ tool_name=tool_name,
833
+ execution_time=perf_counter() - start,
834
+ processed_by=chunk_operator_output.processed_by,
835
+ token_usage=token_usage,
790
836
  )
791
837
  tool_output = ToolOutput(
792
838
  result=translation,
@@ -816,7 +862,10 @@ class TheTool:
816
862
  )
817
863
 
818
864
  metadata = ToolOutputMetadata(
819
- tool_name=tool_name, execution_time=perf_counter() - start
865
+ tool_name=tool_name,
866
+ execution_time=perf_counter() - start,
867
+ processed_by=operator_output.processed_by,
868
+ token_usage=operator_output.token_usage,
820
869
  )
821
870
  tool_output = ToolOutput(
822
871
  result=operator_output.result,
@@ -894,7 +943,10 @@ class TheTool:
894
943
  )
895
944
 
896
945
  metadata = ToolOutputMetadata(
897
- tool_name=tool_name, execution_time=perf_counter() - start
946
+ tool_name=tool_name,
947
+ execution_time=perf_counter() - start,
948
+ processed_by=operator_output.processed_by,
949
+ token_usage=operator_output.token_usage,
898
950
  )
899
951
  tool_output = ToolOutput(
900
952
  result=operator_output.result,
@@ -975,7 +1027,10 @@ class TheTool:
975
1027
  )
976
1028
 
977
1029
  metadata = ToolOutputMetadata(
978
- tool_name=tool_name, execution_time=perf_counter() - start
1030
+ tool_name=tool_name,
1031
+ execution_time=perf_counter() - start,
1032
+ processed_by=operator_output.processed_by,
1033
+ token_usage=operator_output.token_usage,
979
1034
  )
980
1035
  tool_output = ToolOutput(
981
1036
  result=operator_output.result,
@@ -1055,7 +1110,10 @@ class TheTool:
1055
1110
  )
1056
1111
 
1057
1112
  metadata = ToolOutputMetadata(
1058
- tool_name=tool_name, execution_time=perf_counter() - start
1113
+ tool_name=tool_name,
1114
+ execution_time=perf_counter() - start,
1115
+ processed_by=operator_output.processed_by,
1116
+ token_usage=operator_output.token_usage,
1059
1117
  )
1060
1118
  tool_output = ToolOutput(
1061
1119
  result=operator_output.result,
@@ -1,6 +0,0 @@
1
- from .models import CategoryTree
2
- from .tools.async_tools import AsyncTheTool
3
- from .tools.batch_tools import BatchTheTool
4
- from .tools.sync_tools import TheTool
5
-
6
- __all__ = ["CategoryTree", "AsyncTheTool", "TheTool", "BatchTheTool"]
File without changes
File without changes