hamtaa-texttools 2.2.0__tar.gz → 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/PKG-INFO +15 -3
  2. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/README.md +13 -2
  3. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/hamtaa_texttools.egg-info/PKG-INFO +15 -3
  4. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/hamtaa_texttools.egg-info/requires.txt +1 -0
  5. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/pyproject.toml +2 -1
  6. hamtaa_texttools-2.3.0/texttools/__init__.py +4 -0
  7. hamtaa_texttools-2.3.0/texttools/core/__init__.py +34 -0
  8. hamtaa_texttools-2.3.0/texttools/core/internal_models.py +123 -0
  9. hamtaa_texttools-2.3.0/texttools/core/operators/__init__.py +4 -0
  10. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/core/operators/async_operator.py +11 -3
  11. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/core/operators/sync_operator.py +9 -3
  12. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/core/utils.py +33 -0
  13. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/models.py +4 -0
  14. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/to_question.yaml +0 -2
  15. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/translate.yaml +2 -2
  16. hamtaa_texttools-2.3.0/texttools/tools/__init__.py +5 -0
  17. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/tools/async_tools.py +69 -18
  18. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/tools/sync_tools.py +69 -18
  19. hamtaa_texttools-2.2.0/texttools/__init__.py +0 -6
  20. hamtaa_texttools-2.2.0/texttools/core/__init__.py +0 -0
  21. hamtaa_texttools-2.2.0/texttools/core/internal_models.py +0 -71
  22. hamtaa_texttools-2.2.0/texttools/core/operators/__init__.py +0 -0
  23. hamtaa_texttools-2.2.0/texttools/tools/__init__.py +0 -0
  24. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/LICENSE +0 -0
  25. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/hamtaa_texttools.egg-info/SOURCES.txt +0 -0
  26. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  27. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  28. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/setup.cfg +0 -0
  29. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/tests/test_category_tree.py +0 -0
  30. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/tests/test_to_chunks.py +0 -0
  31. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/core/exceptions.py +0 -0
  32. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/augment.yaml +0 -0
  33. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/categorize.yaml +0 -0
  34. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/extract_entities.yaml +0 -0
  35. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/extract_keywords.yaml +0 -0
  36. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/is_fact.yaml +0 -0
  37. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/is_question.yaml +0 -0
  38. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/merge_questions.yaml +0 -0
  39. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/propositionize.yaml +0 -0
  40. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/run_custom.yaml +0 -0
  41. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/summarize.yaml +0 -0
  42. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/py.typed +0 -0
  43. {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/tools/batch_tools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 2.2.0
3
+ Version: 2.3.0
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -17,6 +17,7 @@ License-File: LICENSE
17
17
  Requires-Dist: dotenv>=0.9.9
18
18
  Requires-Dist: openai>=1.97.1
19
19
  Requires-Dist: pydantic>=2.0.0
20
+ Requires-Dist: pytest>=9.0.2
20
21
  Requires-Dist: pyyaml>=6.0
21
22
  Dynamic: license-file
22
23
 
@@ -108,20 +109,31 @@ pip install -U hamtaa-texttools
108
109
  ## 🧩 ToolOutput
109
110
 
110
111
  Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
112
+
111
113
  - **`result: Any`**
112
114
  - **`analysis: str`**
113
115
  - **`logprobs: list`**
114
116
  - **`errors: list[str]`**
115
- - **`ToolOutputMetadata`**
117
+ - **`ToolOutputMetadata`**
116
118
  - **`tool_name: str`**
119
+ - **`processed_by: str`**
117
120
  - **`processed_at: datetime`**
118
121
  - **`execution_time: float`**
122
+ - **`token_usage: TokenUsage`**
123
+ - **`completion_usage: CompletionUsage`**
124
+ - **`prompt_tokens: int`**
125
+ - **`completion_tokens: int`**
126
+ - **`total_tokens: int`**
127
+ - **`analyze_usage: AnalyzeUsage`**
128
+ - **`prompt_tokens: int`**
129
+ - **`completion_tokens: int`**
130
+ - **`total_tokens: int`**
119
131
 
120
132
  - Serialize output to JSON using the `to_json()` method.
121
133
  - Verify operation success with the `is_successful()` method.
122
134
  - Convert output to a dictionary with the `to_dict()` method.
123
135
 
124
- **Note:** For BatchTheTool: Each method returns a list[ToolOutput] containing results for all input texts.
136
+ **Note:** For BatchTheTool: Each method returns a `list[ToolOutput]` containing results for all input texts.
125
137
 
126
138
  ---
127
139
 
@@ -86,20 +86,31 @@ pip install -U hamtaa-texttools
86
86
  ## 🧩 ToolOutput
87
87
 
88
88
  Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
89
+
89
90
  - **`result: Any`**
90
91
  - **`analysis: str`**
91
92
  - **`logprobs: list`**
92
93
  - **`errors: list[str]`**
93
- - **`ToolOutputMetadata`**
94
+ - **`ToolOutputMetadata`**
94
95
  - **`tool_name: str`**
96
+ - **`processed_by: str`**
95
97
  - **`processed_at: datetime`**
96
98
  - **`execution_time: float`**
99
+ - **`token_usage: TokenUsage`**
100
+ - **`completion_usage: CompletionUsage`**
101
+ - **`prompt_tokens: int`**
102
+ - **`completion_tokens: int`**
103
+ - **`total_tokens: int`**
104
+ - **`analyze_usage: AnalyzeUsage`**
105
+ - **`prompt_tokens: int`**
106
+ - **`completion_tokens: int`**
107
+ - **`total_tokens: int`**
97
108
 
98
109
  - Serialize output to JSON using the `to_json()` method.
99
110
  - Verify operation success with the `is_successful()` method.
100
111
  - Convert output to a dictionary with the `to_dict()` method.
101
112
 
102
- **Note:** For BatchTheTool: Each method returns a list[ToolOutput] containing results for all input texts.
113
+ **Note:** For BatchTheTool: Each method returns a `list[ToolOutput]` containing results for all input texts.
103
114
 
104
115
  ---
105
116
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 2.2.0
3
+ Version: 2.3.0
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -17,6 +17,7 @@ License-File: LICENSE
17
17
  Requires-Dist: dotenv>=0.9.9
18
18
  Requires-Dist: openai>=1.97.1
19
19
  Requires-Dist: pydantic>=2.0.0
20
+ Requires-Dist: pytest>=9.0.2
20
21
  Requires-Dist: pyyaml>=6.0
21
22
  Dynamic: license-file
22
23
 
@@ -108,20 +109,31 @@ pip install -U hamtaa-texttools
108
109
  ## 🧩 ToolOutput
109
110
 
110
111
  Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
112
+
111
113
  - **`result: Any`**
112
114
  - **`analysis: str`**
113
115
  - **`logprobs: list`**
114
116
  - **`errors: list[str]`**
115
- - **`ToolOutputMetadata`**
117
+ - **`ToolOutputMetadata`**
116
118
  - **`tool_name: str`**
119
+ - **`processed_by: str`**
117
120
  - **`processed_at: datetime`**
118
121
  - **`execution_time: float`**
122
+ - **`token_usage: TokenUsage`**
123
+ - **`completion_usage: CompletionUsage`**
124
+ - **`prompt_tokens: int`**
125
+ - **`completion_tokens: int`**
126
+ - **`total_tokens: int`**
127
+ - **`analyze_usage: AnalyzeUsage`**
128
+ - **`prompt_tokens: int`**
129
+ - **`completion_tokens: int`**
130
+ - **`total_tokens: int`**
119
131
 
120
132
  - Serialize output to JSON using the `to_json()` method.
121
133
  - Verify operation success with the `is_successful()` method.
122
134
  - Convert output to a dictionary with the `to_dict()` method.
123
135
 
124
- **Note:** For BatchTheTool: Each method returns a list[ToolOutput] containing results for all input texts.
136
+ **Note:** For BatchTheTool: Each method returns a `list[ToolOutput]` containing results for all input texts.
125
137
 
126
138
  ---
127
139
 
@@ -1,4 +1,5 @@
1
1
  dotenv>=0.9.9
2
2
  openai>=1.97.1
3
3
  pydantic>=2.0.0
4
+ pytest>=9.0.2
4
5
  pyyaml>=6.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "2.2.0"
7
+ version = "2.3.0"
8
8
  authors = [
9
9
  {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
10
10
  {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
@@ -24,6 +24,7 @@ dependencies = [
24
24
  "dotenv>=0.9.9",
25
25
  "openai>=1.97.1",
26
26
  "pydantic>=2.0.0",
27
+ "pytest>=9.0.2",
27
28
  "pyyaml>=6.0",
28
29
  ]
29
30
  keywords = ["nlp", "llm", "text-processing", "openai"]
@@ -0,0 +1,4 @@
1
+ from .models import CategoryTree
2
+ from .tools import AsyncTheTool, BatchTheTool, TheTool
3
+
4
+ __all__ = ["CategoryTree", "AsyncTheTool", "BatchTheTool", "TheTool"]
@@ -0,0 +1,34 @@
1
+ from .exceptions import LLMError, PromptError, TextToolsError, ValidationError
2
+ from .internal_models import (
3
+ Bool,
4
+ ListDictStrStr,
5
+ ListStr,
6
+ ReasonListStr,
7
+ Str,
8
+ TokenUsage,
9
+ create_dynamic_model,
10
+ )
11
+ from .operators import AsyncOperator, Operator
12
+ from .utils import OperatorUtils, TheToolUtils
13
+
14
+ __all__ = [
15
+ # Exceptions
16
+ "LLMError",
17
+ "PromptError",
18
+ "TextToolsError",
19
+ "ValidationError",
20
+ # Internal models
21
+ "Bool",
22
+ "ListDictStrStr",
23
+ "ListStr",
24
+ "ReasonListStr",
25
+ "Str",
26
+ "TokenUsage",
27
+ "create_dynamic_model",
28
+ # Operators
29
+ "AsyncOperator",
30
+ "Operator",
31
+ # Utils
32
+ "OperatorUtils",
33
+ "TheToolUtils",
34
+ ]
@@ -0,0 +1,123 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Literal
4
+
5
+ from pydantic import BaseModel, Field, create_model
6
+
7
+
8
+ class CompletionUsage(BaseModel):
9
+ prompt_tokens: int = 0
10
+ completion_tokens: int = 0
11
+ total_tokens: int = 0
12
+
13
+
14
+ class AnalyzeUsage(BaseModel):
15
+ prompt_tokens: int = 0
16
+ completion_tokens: int = 0
17
+ total_tokens: int = 0
18
+
19
+
20
+ class TokenUsage(BaseModel):
21
+ completion_usage: CompletionUsage = CompletionUsage()
22
+ analyze_usage: AnalyzeUsage = AnalyzeUsage()
23
+ total_tokens: int = 0
24
+
25
+ def __add__(self, other: TokenUsage) -> TokenUsage:
26
+ new_completion_usage = CompletionUsage(
27
+ prompt_tokens=self.completion_usage.prompt_tokens
28
+ + other.completion_usage.prompt_tokens,
29
+ completion_tokens=self.completion_usage.completion_tokens
30
+ + other.completion_usage.completion_tokens,
31
+ total_tokens=self.completion_usage.total_tokens
32
+ + other.completion_usage.total_tokens,
33
+ )
34
+ new_analyze_usage = AnalyzeUsage(
35
+ prompt_tokens=self.analyze_usage.prompt_tokens
36
+ + other.analyze_usage.prompt_tokens,
37
+ completion_tokens=self.analyze_usage.completion_tokens
38
+ + other.analyze_usage.completion_tokens,
39
+ total_tokens=self.analyze_usage.total_tokens
40
+ + other.analyze_usage.total_tokens,
41
+ )
42
+ total_tokens = (
43
+ new_completion_usage.total_tokens + new_analyze_usage.total_tokens
44
+ )
45
+
46
+ return TokenUsage(
47
+ completion_usage=new_completion_usage,
48
+ analyze_usage=new_analyze_usage,
49
+ total_tokens=total_tokens,
50
+ )
51
+
52
+
53
+ class OperatorOutput(BaseModel):
54
+ result: Any
55
+ analysis: str | None
56
+ logprobs: list[dict[str, Any]] | None
57
+ token_usage: TokenUsage | None = None
58
+ prompt_tokens: int | None = None
59
+ completion_tokens: int | None = None
60
+ analysis_tokens: int | None = None
61
+ total_tokens: int | None = None
62
+
63
+
64
+ class Str(BaseModel):
65
+ result: str = Field(
66
+ ..., description="The output string", json_schema_extra={"example": "text"}
67
+ )
68
+
69
+
70
+ class Bool(BaseModel):
71
+ result: bool = Field(
72
+ ...,
73
+ description="Boolean indicating the output state",
74
+ json_schema_extra={"example": True},
75
+ )
76
+
77
+
78
+ class ListStr(BaseModel):
79
+ result: list[str] = Field(
80
+ ...,
81
+ description="The output list of strings",
82
+ json_schema_extra={"example": ["text_1", "text_2", "text_3"]},
83
+ )
84
+
85
+
86
+ class ListDictStrStr(BaseModel):
87
+ result: list[dict[str, str]] = Field(
88
+ ...,
89
+ description="List of dictionaries containing string key-value pairs",
90
+ json_schema_extra={
91
+ "example": [
92
+ {"text": "Mohammad", "type": "PER"},
93
+ {"text": "Iran", "type": "LOC"},
94
+ ]
95
+ },
96
+ )
97
+
98
+
99
+ class ReasonListStr(BaseModel):
100
+ reason: str = Field(..., description="Thinking process that led to the output")
101
+ result: list[str] = Field(
102
+ ...,
103
+ description="The output list of strings",
104
+ json_schema_extra={"example": ["text_1", "text_2", "text_3"]},
105
+ )
106
+
107
+
108
+ # Create CategorizerOutput with dynamic categories
109
+ def create_dynamic_model(allowed_values: list[str]) -> type[BaseModel]:
110
+ literal_type = Literal[*allowed_values]
111
+
112
+ CategorizerOutput = create_model(
113
+ "CategorizerOutput",
114
+ reason=(
115
+ str,
116
+ Field(
117
+ ..., description="Explanation of why the input belongs to the category"
118
+ ),
119
+ ),
120
+ result=(literal_type, Field(..., description="Predicted category label")),
121
+ )
122
+
123
+ return CategorizerOutput
@@ -0,0 +1,4 @@
1
+ from .async_operator import AsyncOperator
2
+ from .sync_operator import Operator
3
+
4
+ __all__ = ["AsyncOperator", "Operator"]
@@ -18,7 +18,9 @@ class AsyncOperator:
18
18
  self._client = client
19
19
  self._model = model
20
20
 
21
- async def _analyze_completion(self, analyze_message: list[dict[str, str]]) -> str:
21
+ async def _analyze_completion(
22
+ self, analyze_message: list[dict[str, str]]
23
+ ) -> tuple[str, Any]:
22
24
  try:
23
25
  completion = await self._client.chat.completions.create(
24
26
  model=self._model,
@@ -33,7 +35,7 @@ class AsyncOperator:
33
35
  if not analysis:
34
36
  raise LLMError("Empty analysis response")
35
37
 
36
- return analysis
38
+ return analysis, completion
37
39
 
38
40
  except Exception as e:
39
41
  if isinstance(e, (PromptError, LLMError)):
@@ -116,12 +118,15 @@ class AsyncOperator:
116
118
  )
117
119
 
118
120
  analysis: str | None = None
121
+ analyze_completion: Any = None
119
122
 
120
123
  if with_analysis:
121
124
  analyze_message = OperatorUtils.build_message(
122
125
  prompt_configs["analyze_template"]
123
126
  )
124
- analysis = await self._analyze_completion(analyze_message)
127
+ analysis, analyze_completion = await self._analyze_completion(
128
+ analyze_message
129
+ )
125
130
 
126
131
  main_prompt = OperatorUtils.build_main_prompt(
127
132
  prompt_configs["main_template"], analysis, output_lang, user_prompt
@@ -176,6 +181,9 @@ class AsyncOperator:
176
181
  logprobs=OperatorUtils.extract_logprobs(completion)
177
182
  if logprobs
178
183
  else None,
184
+ token_usage=OperatorUtils.extract_token_usage(
185
+ completion, analyze_completion
186
+ ),
179
187
  )
180
188
 
181
189
  return operator_output
@@ -18,7 +18,9 @@ class Operator:
18
18
  self._client = client
19
19
  self._model = model
20
20
 
21
- def _analyze_completion(self, analyze_message: list[dict[str, str]]) -> str:
21
+ def _analyze_completion(
22
+ self, analyze_message: list[dict[str, str]]
23
+ ) -> tuple[str, Any]:
22
24
  try:
23
25
  completion = self._client.chat.completions.create(
24
26
  model=self._model,
@@ -33,7 +35,7 @@ class Operator:
33
35
  if not analysis:
34
36
  raise LLMError("Empty analysis response")
35
37
 
36
- return analysis
38
+ return analysis, completion
37
39
 
38
40
  except Exception as e:
39
41
  if isinstance(e, (PromptError, LLMError)):
@@ -114,12 +116,13 @@ class Operator:
114
116
  )
115
117
 
116
118
  analysis: str | None = None
119
+ analyze_completion: Any = None
117
120
 
118
121
  if with_analysis:
119
122
  analyze_message = OperatorUtils.build_message(
120
123
  prompt_configs["analyze_template"]
121
124
  )
122
- analysis = self._analyze_completion(analyze_message)
125
+ analysis, analyze_completion = self._analyze_completion(analyze_message)
123
126
 
124
127
  main_prompt = OperatorUtils.build_main_prompt(
125
128
  prompt_configs["main_template"], analysis, output_lang, user_prompt
@@ -174,6 +177,9 @@ class Operator:
174
177
  logprobs=OperatorUtils.extract_logprobs(completion)
175
178
  if logprobs
176
179
  else None,
180
+ token_usage=OperatorUtils.extract_token_usage(
181
+ completion, analyze_completion
182
+ ),
177
183
  )
178
184
 
179
185
  return operator_output
@@ -9,6 +9,7 @@ from typing import Any
9
9
  import yaml
10
10
 
11
11
  from .exceptions import PromptError
12
+ from .internal_models import AnalyzeUsage, CompletionUsage, TokenUsage
12
13
 
13
14
 
14
15
  class OperatorUtils:
@@ -148,6 +149,38 @@ class OperatorUtils:
148
149
  new_temp = base_temp + random.choice([-1, 1]) * random.uniform(0.1, 0.9)
149
150
  return max(0.0, min(new_temp, 1.5))
150
151
 
152
+ @staticmethod
153
+ def extract_token_usage(completion: Any, analyze_completion: Any) -> TokenUsage:
154
+ completion_usage = completion.usage
155
+ analyze_usage = analyze_completion.usage if analyze_completion else None
156
+
157
+ completion_usage_model = CompletionUsage(
158
+ prompt_tokens=getattr(completion_usage, "prompt_tokens", 00),
159
+ completion_tokens=getattr(completion_usage, "completion_tokens", 00),
160
+ total_tokens=getattr(completion_usage, "total_tokens", 00),
161
+ )
162
+ analyze_usage_model = AnalyzeUsage(
163
+ prompt_tokens=getattr(analyze_usage, "prompt_tokens", 0),
164
+ completion_tokens=getattr(analyze_usage, "completion_tokens", 0),
165
+ total_tokens=getattr(analyze_usage, "total_tokens", 0),
166
+ )
167
+ total_analyze_tokens = (
168
+ analyze_usage_model.prompt_tokens + analyze_usage_model.completion_tokens
169
+ if analyze_completion
170
+ else 0
171
+ )
172
+ total_tokens = (
173
+ completion_usage_model.prompt_tokens
174
+ + completion_usage_model.completion_tokens
175
+ + total_analyze_tokens
176
+ )
177
+
178
+ return TokenUsage(
179
+ completion_usage=completion_usage_model,
180
+ analyze_usage=analyze_usage_model,
181
+ total_tokens=total_tokens,
182
+ )
183
+
151
184
 
152
185
  class TheToolUtils:
153
186
  """
@@ -5,11 +5,15 @@ from typing import Any
5
5
 
6
6
  from pydantic import BaseModel, Field
7
7
 
8
+ from .core import TokenUsage
9
+
8
10
 
9
11
  class ToolOutputMetadata(BaseModel):
10
12
  tool_name: str
13
+ processed_by: str | None = None
11
14
  processed_at: datetime = Field(default_factory=datetime.now)
12
15
  execution_time: float | None = None
16
+ token_usage: TokenUsage | None = None
13
17
 
14
18
 
15
19
  class ToolOutput(BaseModel):
@@ -7,7 +7,6 @@ main_template:
7
7
  and must not mention any verbs like this, that, he or she in the question.
8
8
 
9
9
  There is a `reason` key, fill that up with a summerized version of your thoughts.
10
- The `reason` must be less than 20 words.
11
10
  Don't forget to fill the reason.
12
11
 
13
12
  Respond only in JSON format:
@@ -23,7 +22,6 @@ main_template:
23
22
  and must not mention any verbs like this, that, he or she in the question.
24
23
 
25
24
  There is a `reason` key, fill that up with a summerized version of your thoughts.
26
- The `reason` must be less than 20 words.
27
25
  Don't forget to fill the reason.
28
26
 
29
27
  Respond only in JSON format:
@@ -3,9 +3,9 @@ main_template: |
3
3
  Output only the translated text.
4
4
 
5
5
  Respond only in JSON format:
6
- {{"result": "string"}}
6
+ {{"result": "translated_text"}}
7
7
 
8
- Don't translate proper name, only transliterate them to {target_lang}
8
+ Don't translate proper names, only transliterate them to {target_lang}
9
9
 
10
10
  Translate the following text to {target_lang}:
11
11
  {text}
@@ -0,0 +1,5 @@
1
+ from .async_tools import AsyncTheTool
2
+ from .batch_tools import BatchTheTool
3
+ from .sync_tools import TheTool
4
+
5
+ __all__ = ["AsyncTheTool", "BatchTheTool", "TheTool"]
@@ -5,17 +5,21 @@ from typing import Any, Literal
5
5
 
6
6
  from openai import AsyncOpenAI
7
7
 
8
- from ..core.exceptions import LLMError, PromptError, TextToolsError, ValidationError
9
- from ..core.internal_models import (
8
+ from ..core import (
9
+ AsyncOperator,
10
10
  Bool,
11
11
  ListDictStrStr,
12
12
  ListStr,
13
+ LLMError,
14
+ PromptError,
13
15
  ReasonListStr,
14
16
  Str,
17
+ TextToolsError,
18
+ TheToolUtils,
19
+ TokenUsage,
20
+ ValidationError,
15
21
  create_dynamic_model,
16
22
  )
17
- from ..core.operators.async_operator import AsyncOperator
18
- from ..core.utils import TheToolUtils
19
23
  from ..models import CategoryTree, ToolOutput, ToolOutputMetadata
20
24
 
21
25
 
@@ -29,6 +33,7 @@ class AsyncTheTool:
29
33
  self._operator = AsyncOperator(client=client, model=model)
30
34
  self.logger = logging.getLogger(self.__class__.__name__)
31
35
  self.raise_on_error = raise_on_error
36
+ self.model = model
32
37
 
33
38
  async def categorize(
34
39
  self,
@@ -91,7 +96,10 @@ class AsyncTheTool:
91
96
  )
92
97
 
93
98
  metadata = ToolOutputMetadata(
94
- tool_name=tool_name, execution_time=perf_counter() - start
99
+ tool_name=tool_name,
100
+ execution_time=perf_counter() - start,
101
+ processed_by=self.model,
102
+ token_usage=operator_output.token_usage,
95
103
  )
96
104
  tool_output = ToolOutput(
97
105
  result=operator_output.result,
@@ -106,6 +114,7 @@ class AsyncTheTool:
106
114
  final_categories = []
107
115
  analysis = ""
108
116
  logprobs_list = []
117
+ token_usage = TokenUsage()
109
118
 
110
119
  for _ in range(levels):
111
120
  if not parent_node.children:
@@ -149,9 +158,13 @@ class AsyncTheTool:
149
158
  analysis += level_operator_output.analysis
150
159
  if logprobs:
151
160
  logprobs_list.extend(level_operator_output.logprobs)
161
+ token_usage += level_operator_output.token_usage
152
162
 
153
163
  metadata = ToolOutputMetadata(
154
- tool_name=tool_name, execution_time=(perf_counter() - start)
164
+ tool_name=tool_name,
165
+ execution_time=perf_counter() - start,
166
+ processed_by=self.model,
167
+ token_usage=token_usage,
155
168
  )
156
169
  tool_output = ToolOutput(
157
170
  result=final_categories,
@@ -237,7 +250,10 @@ class AsyncTheTool:
237
250
  )
238
251
 
239
252
  metadata = ToolOutputMetadata(
240
- tool_name=tool_name, execution_time=perf_counter() - start
253
+ tool_name=tool_name,
254
+ execution_time=perf_counter() - start,
255
+ processed_by=self.model,
256
+ token_usage=operator_output.token_usage,
241
257
  )
242
258
  tool_output = ToolOutput(
243
259
  result=operator_output.result,
@@ -321,7 +337,10 @@ class AsyncTheTool:
321
337
  )
322
338
 
323
339
  metadata = ToolOutputMetadata(
324
- tool_name=tool_name, execution_time=perf_counter() - start
340
+ tool_name=tool_name,
341
+ execution_time=perf_counter() - start,
342
+ processed_by=self.model,
343
+ token_usage=operator_output.token_usage,
325
344
  )
326
345
  tool_output = ToolOutput(
327
346
  result=operator_output.result,
@@ -400,7 +419,10 @@ class AsyncTheTool:
400
419
  )
401
420
 
402
421
  metadata = ToolOutputMetadata(
403
- tool_name=tool_name, execution_time=perf_counter() - start
422
+ tool_name=tool_name,
423
+ execution_time=perf_counter() - start,
424
+ processed_by=self.model,
425
+ token_usage=operator_output.token_usage,
404
426
  )
405
427
  tool_output = ToolOutput(
406
428
  result=operator_output.result,
@@ -486,7 +508,10 @@ class AsyncTheTool:
486
508
  )
487
509
 
488
510
  metadata = ToolOutputMetadata(
489
- tool_name=tool_name, execution_time=perf_counter() - start
511
+ tool_name=tool_name,
512
+ execution_time=perf_counter() - start,
513
+ processed_by=self.model,
514
+ token_usage=operator_output.token_usage,
490
515
  )
491
516
  tool_output = ToolOutput(
492
517
  result=operator_output.result,
@@ -570,7 +595,10 @@ class AsyncTheTool:
570
595
  )
571
596
 
572
597
  metadata = ToolOutputMetadata(
573
- tool_name=tool_name, execution_time=perf_counter() - start
598
+ tool_name=tool_name,
599
+ execution_time=perf_counter() - start,
600
+ processed_by=self.model,
601
+ token_usage=operator_output.token_usage,
574
602
  )
575
603
  tool_output = ToolOutput(
576
604
  result=operator_output.result,
@@ -653,7 +681,10 @@ class AsyncTheTool:
653
681
  )
654
682
 
655
683
  metadata = ToolOutputMetadata(
656
- tool_name=tool_name, execution_time=perf_counter() - start
684
+ tool_name=tool_name,
685
+ execution_time=perf_counter() - start,
686
+ processed_by=self.model,
687
+ token_usage=operator_output.token_usage,
657
688
  )
658
689
  tool_output = ToolOutput(
659
690
  result=operator_output.result,
@@ -734,7 +765,10 @@ class AsyncTheTool:
734
765
  )
735
766
 
736
767
  metadata = ToolOutputMetadata(
737
- tool_name=tool_name, execution_time=perf_counter() - start
768
+ tool_name=tool_name,
769
+ execution_time=perf_counter() - start,
770
+ processed_by=self.model,
771
+ token_usage=operator_output.token_usage,
738
772
  )
739
773
  tool_output = ToolOutput(
740
774
  result=operator_output.result,
@@ -802,6 +836,7 @@ class AsyncTheTool:
802
836
  translation = ""
803
837
  analysis = ""
804
838
  logprobs_list = []
839
+ token_usage = TokenUsage()
805
840
 
806
841
  for chunk in chunks:
807
842
  chunk_operator_output = await TheToolUtils.run_with_timeout(
@@ -832,9 +867,13 @@ class AsyncTheTool:
832
867
  analysis += chunk_operator_output.analysis
833
868
  if logprobs:
834
869
  logprobs_list.extend(chunk_operator_output.logprobs)
870
+ token_usage += chunk_operator_output.token_usage
835
871
 
836
872
  metadata = ToolOutputMetadata(
837
- tool_name=tool_name, execution_time=perf_counter() - start
873
+ tool_name=tool_name,
874
+ execution_time=perf_counter() - start,
875
+ processed_by=self.model,
876
+ token_usage=token_usage,
838
877
  )
839
878
  tool_output = ToolOutput(
840
879
  result=translation,
@@ -867,7 +906,10 @@ class AsyncTheTool:
867
906
  )
868
907
 
869
908
  metadata = ToolOutputMetadata(
870
- tool_name=tool_name, execution_time=perf_counter() - start
909
+ tool_name=tool_name,
910
+ execution_time=perf_counter() - start,
911
+ processed_by=self.model,
912
+ token_usage=operator_output.token_usage,
871
913
  )
872
914
  tool_output = ToolOutput(
873
915
  result=operator_output.result,
@@ -950,7 +992,10 @@ class AsyncTheTool:
950
992
  )
951
993
 
952
994
  metadata = ToolOutputMetadata(
953
- tool_name=tool_name, execution_time=perf_counter() - start
995
+ tool_name=tool_name,
996
+ execution_time=perf_counter() - start,
997
+ processed_by=self.model,
998
+ token_usage=operator_output.token_usage,
954
999
  )
955
1000
  tool_output = ToolOutput(
956
1001
  result=operator_output.result,
@@ -1036,7 +1081,10 @@ class AsyncTheTool:
1036
1081
  )
1037
1082
 
1038
1083
  metadata = ToolOutputMetadata(
1039
- tool_name=tool_name, execution_time=perf_counter() - start
1084
+ tool_name=tool_name,
1085
+ execution_time=perf_counter() - start,
1086
+ processed_by=self.model,
1087
+ token_usage=operator_output.token_usage,
1040
1088
  )
1041
1089
  tool_output = ToolOutput(
1042
1090
  result=operator_output.result,
@@ -1121,7 +1169,10 @@ class AsyncTheTool:
1121
1169
  )
1122
1170
 
1123
1171
  metadata = ToolOutputMetadata(
1124
- tool_name=tool_name, execution_time=perf_counter() - start
1172
+ tool_name=tool_name,
1173
+ execution_time=perf_counter() - start,
1174
+ processed_by=self.model,
1175
+ token_usage=operator_output.token_usage,
1125
1176
  )
1126
1177
  tool_output = ToolOutput(
1127
1178
  result=operator_output.result,
@@ -5,17 +5,21 @@ from typing import Any, Literal
5
5
 
6
6
  from openai import OpenAI
7
7
 
8
- from ..core.exceptions import LLMError, PromptError, TextToolsError, ValidationError
9
- from ..core.internal_models import (
8
+ from ..core import (
10
9
  Bool,
11
10
  ListDictStrStr,
12
11
  ListStr,
12
+ LLMError,
13
+ Operator,
14
+ PromptError,
13
15
  ReasonListStr,
14
16
  Str,
17
+ TextToolsError,
18
+ TheToolUtils,
19
+ TokenUsage,
20
+ ValidationError,
15
21
  create_dynamic_model,
16
22
  )
17
- from ..core.operators.sync_operator import Operator
18
- from ..core.utils import TheToolUtils
19
23
  from ..models import CategoryTree, ToolOutput, ToolOutputMetadata
20
24
 
21
25
 
@@ -29,6 +33,7 @@ class TheTool:
29
33
  self._operator = Operator(client=client, model=model)
30
34
  self.logger = logging.getLogger(self.__class__.__name__)
31
35
  self.raise_on_error = raise_on_error
36
+ self.model = model
32
37
 
33
38
  def categorize(
34
39
  self,
@@ -86,7 +91,10 @@ class TheTool:
86
91
  )
87
92
 
88
93
  metadata = ToolOutputMetadata(
89
- tool_name=tool_name, execution_time=perf_counter() - start
94
+ tool_name=tool_name,
95
+ execution_time=perf_counter() - start,
96
+ processed_by=self.model,
97
+ token_usage=operator_output.token_usage,
90
98
  )
91
99
  tool_output = ToolOutput(
92
100
  result=operator_output.result,
@@ -101,6 +109,7 @@ class TheTool:
101
109
  final_categories = []
102
110
  analysis = ""
103
111
  logprobs_list = []
112
+ token_usage = TokenUsage()
104
113
 
105
114
  for _ in range(levels):
106
115
  if not parent_node.children:
@@ -141,9 +150,13 @@ class TheTool:
141
150
  analysis += level_operator_output.analysis
142
151
  if logprobs:
143
152
  logprobs_list.extend(level_operator_output.logprobs)
153
+ token_usage += level_operator_output.token_usage
144
154
 
145
155
  metadata = ToolOutputMetadata(
146
- tool_name=tool_name, execution_time=(perf_counter() - start)
156
+ tool_name=tool_name,
157
+ execution_time=perf_counter() - start,
158
+ processed_by=self.model,
159
+ token_usage=token_usage,
147
160
  )
148
161
  tool_output = ToolOutput(
149
162
  result=final_categories,
@@ -224,7 +237,10 @@ class TheTool:
224
237
  )
225
238
 
226
239
  metadata = ToolOutputMetadata(
227
- tool_name=tool_name, execution_time=perf_counter() - start
240
+ tool_name=tool_name,
241
+ execution_time=perf_counter() - start,
242
+ processed_by=self.model,
243
+ token_usage=operator_output.token_usage,
228
244
  )
229
245
  tool_output = ToolOutput(
230
246
  result=operator_output.result,
@@ -303,7 +319,10 @@ class TheTool:
303
319
  )
304
320
 
305
321
  metadata = ToolOutputMetadata(
306
- tool_name=tool_name, execution_time=perf_counter() - start
322
+ tool_name=tool_name,
323
+ execution_time=perf_counter() - start,
324
+ processed_by=self.model,
325
+ token_usage=operator_output.token_usage,
307
326
  )
308
327
  tool_output = ToolOutput(
309
328
  result=operator_output.result,
@@ -377,7 +396,10 @@ class TheTool:
377
396
  )
378
397
 
379
398
  metadata = ToolOutputMetadata(
380
- tool_name=tool_name, execution_time=perf_counter() - start
399
+ tool_name=tool_name,
400
+ execution_time=perf_counter() - start,
401
+ processed_by=self.model,
402
+ token_usage=operator_output.token_usage,
381
403
  )
382
404
  tool_output = ToolOutput(
383
405
  result=operator_output.result,
@@ -458,7 +480,10 @@ class TheTool:
458
480
  )
459
481
 
460
482
  metadata = ToolOutputMetadata(
461
- tool_name=tool_name, execution_time=perf_counter() - start
483
+ tool_name=tool_name,
484
+ execution_time=perf_counter() - start,
485
+ processed_by=self.model,
486
+ token_usage=operator_output.token_usage,
462
487
  )
463
488
  tool_output = ToolOutput(
464
489
  result=operator_output.result,
@@ -537,7 +562,10 @@ class TheTool:
537
562
  )
538
563
 
539
564
  metadata = ToolOutputMetadata(
540
- tool_name=tool_name, execution_time=perf_counter() - start
565
+ tool_name=tool_name,
566
+ execution_time=perf_counter() - start,
567
+ processed_by=self.model,
568
+ token_usage=operator_output.token_usage,
541
569
  )
542
570
  tool_output = ToolOutput(
543
571
  result=operator_output.result,
@@ -615,7 +643,10 @@ class TheTool:
615
643
  )
616
644
 
617
645
  metadata = ToolOutputMetadata(
618
- tool_name=tool_name, execution_time=perf_counter() - start
646
+ tool_name=tool_name,
647
+ execution_time=perf_counter() - start,
648
+ processed_by=self.model,
649
+ token_usage=operator_output.token_usage,
619
650
  )
620
651
  tool_output = ToolOutput(
621
652
  result=operator_output.result,
@@ -691,7 +722,10 @@ class TheTool:
691
722
  )
692
723
 
693
724
  metadata = ToolOutputMetadata(
694
- tool_name=tool_name, execution_time=perf_counter() - start
725
+ tool_name=tool_name,
726
+ execution_time=perf_counter() - start,
727
+ processed_by=self.model,
728
+ token_usage=operator_output.token_usage,
695
729
  )
696
730
  tool_output = ToolOutput(
697
731
  result=operator_output.result,
@@ -757,6 +791,7 @@ class TheTool:
757
791
  translation = ""
758
792
  analysis = ""
759
793
  logprobs_list = []
794
+ token_usage = TokenUsage()
760
795
 
761
796
  for chunk in chunks:
762
797
  chunk_operator_output = self._operator.run(
@@ -784,9 +819,13 @@ class TheTool:
784
819
  analysis += chunk_operator_output.analysis
785
820
  if logprobs:
786
821
  logprobs_list.extend(chunk_operator_output.logprobs)
822
+ token_usage += chunk_operator_output.token_usage
787
823
 
788
824
  metadata = ToolOutputMetadata(
789
- tool_name=tool_name, execution_time=perf_counter() - start
825
+ tool_name=tool_name,
826
+ execution_time=perf_counter() - start,
827
+ processed_by=self.model,
828
+ token_usage=token_usage,
790
829
  )
791
830
  tool_output = ToolOutput(
792
831
  result=translation,
@@ -816,7 +855,10 @@ class TheTool:
816
855
  )
817
856
 
818
857
  metadata = ToolOutputMetadata(
819
- tool_name=tool_name, execution_time=perf_counter() - start
858
+ tool_name=tool_name,
859
+ execution_time=perf_counter() - start,
860
+ processed_by=self.model,
861
+ token_usage=operator_output.token_usage,
820
862
  )
821
863
  tool_output = ToolOutput(
822
864
  result=operator_output.result,
@@ -894,7 +936,10 @@ class TheTool:
894
936
  )
895
937
 
896
938
  metadata = ToolOutputMetadata(
897
- tool_name=tool_name, execution_time=perf_counter() - start
939
+ tool_name=tool_name,
940
+ execution_time=perf_counter() - start,
941
+ processed_by=self.model,
942
+ token_usage=operator_output.token_usage,
898
943
  )
899
944
  tool_output = ToolOutput(
900
945
  result=operator_output.result,
@@ -975,7 +1020,10 @@ class TheTool:
975
1020
  )
976
1021
 
977
1022
  metadata = ToolOutputMetadata(
978
- tool_name=tool_name, execution_time=perf_counter() - start
1023
+ tool_name=tool_name,
1024
+ execution_time=perf_counter() - start,
1025
+ processed_by=self.model,
1026
+ token_usage=operator_output.token_usage,
979
1027
  )
980
1028
  tool_output = ToolOutput(
981
1029
  result=operator_output.result,
@@ -1055,7 +1103,10 @@ class TheTool:
1055
1103
  )
1056
1104
 
1057
1105
  metadata = ToolOutputMetadata(
1058
- tool_name=tool_name, execution_time=perf_counter() - start
1106
+ tool_name=tool_name,
1107
+ execution_time=perf_counter() - start,
1108
+ processed_by=self.model,
1109
+ token_usage=operator_output.token_usage,
1059
1110
  )
1060
1111
  tool_output = ToolOutput(
1061
1112
  result=operator_output.result,
@@ -1,6 +0,0 @@
1
- from .models import CategoryTree
2
- from .tools.async_tools import AsyncTheTool
3
- from .tools.batch_tools import BatchTheTool
4
- from .tools.sync_tools import TheTool
5
-
6
- __all__ = ["CategoryTree", "AsyncTheTool", "TheTool", "BatchTheTool"]
File without changes
@@ -1,71 +0,0 @@
1
- from typing import Any, Literal
2
-
3
- from pydantic import BaseModel, Field, create_model
4
-
5
-
6
- class OperatorOutput(BaseModel):
7
- result: Any
8
- analysis: str | None
9
- logprobs: list[dict[str, Any]] | None
10
-
11
-
12
- class Str(BaseModel):
13
- result: str = Field(
14
- ..., description="The output string", json_schema_extra={"example": "text"}
15
- )
16
-
17
-
18
- class Bool(BaseModel):
19
- result: bool = Field(
20
- ...,
21
- description="Boolean indicating the output state",
22
- json_schema_extra={"example": True},
23
- )
24
-
25
-
26
- class ListStr(BaseModel):
27
- result: list[str] = Field(
28
- ...,
29
- description="The output list of strings",
30
- json_schema_extra={"example": ["text_1", "text_2", "text_3"]},
31
- )
32
-
33
-
34
- class ListDictStrStr(BaseModel):
35
- result: list[dict[str, str]] = Field(
36
- ...,
37
- description="List of dictionaries containing string key-value pairs",
38
- json_schema_extra={
39
- "example": [
40
- {"text": "Mohammad", "type": "PER"},
41
- {"text": "Iran", "type": "LOC"},
42
- ]
43
- },
44
- )
45
-
46
-
47
- class ReasonListStr(BaseModel):
48
- reason: str = Field(..., description="Thinking process that led to the output")
49
- result: list[str] = Field(
50
- ...,
51
- description="The output list of strings",
52
- json_schema_extra={"example": ["text_1", "text_2", "text_3"]},
53
- )
54
-
55
-
56
- # Create CategorizerOutput with dynamic categories
57
- def create_dynamic_model(allowed_values: list[str]) -> type[BaseModel]:
58
- literal_type = Literal[*allowed_values]
59
-
60
- CategorizerOutput = create_model(
61
- "CategorizerOutput",
62
- reason=(
63
- str,
64
- Field(
65
- ..., description="Explanation of why the input belongs to the category"
66
- ),
67
- ),
68
- result=(literal_type, Field(..., description="Predicted category label")),
69
- )
70
-
71
- return CategorizerOutput
File without changes