hamtaa-texttools 2.3.0__tar.gz → 2.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/PKG-INFO +2 -1
  2. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/README.md +1 -0
  3. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/hamtaa_texttools.egg-info/PKG-INFO +2 -1
  4. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/pyproject.toml +1 -1
  5. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/core/internal_models.py +1 -4
  6. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/core/operators/async_operator.py +1 -0
  7. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/core/operators/sync_operator.py +1 -0
  8. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/core/utils.py +1 -2
  9. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/tools/async_tools.py +22 -15
  10. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/tools/batch_tools.py +9 -0
  11. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/tools/sync_tools.py +22 -15
  12. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/LICENSE +0 -0
  13. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/hamtaa_texttools.egg-info/SOURCES.txt +0 -0
  14. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  15. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/hamtaa_texttools.egg-info/requires.txt +0 -0
  16. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  17. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/setup.cfg +0 -0
  18. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/tests/test_category_tree.py +0 -0
  19. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/tests/test_to_chunks.py +0 -0
  20. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/__init__.py +0 -0
  21. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/core/__init__.py +0 -0
  22. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/core/exceptions.py +0 -0
  23. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/core/operators/__init__.py +0 -0
  24. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/models.py +0 -0
  25. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/prompts/augment.yaml +0 -0
  26. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/prompts/categorize.yaml +0 -0
  27. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/prompts/extract_entities.yaml +0 -0
  28. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/prompts/extract_keywords.yaml +0 -0
  29. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/prompts/is_fact.yaml +0 -0
  30. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/prompts/is_question.yaml +0 -0
  31. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/prompts/merge_questions.yaml +0 -0
  32. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/prompts/propositionize.yaml +0 -0
  33. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/prompts/run_custom.yaml +0 -0
  34. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/prompts/summarize.yaml +0 -0
  35. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/prompts/to_question.yaml +0 -0
  36. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/prompts/translate.yaml +0 -0
  37. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/py.typed +0 -0
  38. {hamtaa_texttools-2.3.0 → hamtaa_texttools-2.3.1}/texttools/tools/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 2.3.0
3
+ Version: 2.3.1
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -128,6 +128,7 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
128
128
  - **`prompt_tokens: int`**
129
129
  - **`completion_tokens: int`**
130
130
  - **`total_tokens: int`**
131
+ - **`total_tokens: int`**
131
132
 
132
133
  - Serialize output to JSON using the `to_json()` method.
133
134
  - Verify operation success with the `is_successful()` method.
@@ -105,6 +105,7 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
105
105
  - **`prompt_tokens: int`**
106
106
  - **`completion_tokens: int`**
107
107
  - **`total_tokens: int`**
108
+ - **`total_tokens: int`**
108
109
 
109
110
  - Serialize output to JSON using the `to_json()` method.
110
111
  - Verify operation success with the `is_successful()` method.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 2.3.0
3
+ Version: 2.3.1
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -128,6 +128,7 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
128
128
  - **`prompt_tokens: int`**
129
129
  - **`completion_tokens: int`**
130
130
  - **`total_tokens: int`**
131
+ - **`total_tokens: int`**
131
132
 
132
133
  - Serialize output to JSON using the `to_json()` method.
133
134
  - Verify operation success with the `is_successful()` method.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "2.3.0"
7
+ version = "2.3.1"
8
8
  authors = [
9
9
  {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
10
10
  {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
@@ -54,11 +54,8 @@ class OperatorOutput(BaseModel):
54
54
  result: Any
55
55
  analysis: str | None
56
56
  logprobs: list[dict[str, Any]] | None
57
+ processed_by: str
57
58
  token_usage: TokenUsage | None = None
58
- prompt_tokens: int | None = None
59
- completion_tokens: int | None = None
60
- analysis_tokens: int | None = None
61
- total_tokens: int | None = None
62
59
 
63
60
 
64
61
  class Str(BaseModel):
@@ -181,6 +181,7 @@ class AsyncOperator:
181
181
  logprobs=OperatorUtils.extract_logprobs(completion)
182
182
  if logprobs
183
183
  else None,
184
+ processed_by=self._model,
184
185
  token_usage=OperatorUtils.extract_token_usage(
185
186
  completion, analyze_completion
186
187
  ),
@@ -177,6 +177,7 @@ class Operator:
177
177
  logprobs=OperatorUtils.extract_logprobs(completion)
178
178
  if logprobs
179
179
  else None,
180
+ processed_by=self._model,
180
181
  token_usage=OperatorUtils.extract_token_usage(
181
182
  completion, analyze_completion
182
183
  ),
@@ -110,13 +110,12 @@ class OperatorUtils:
110
110
  return [{"role": "user", "content": prompt}]
111
111
 
112
112
  @staticmethod
113
- def extract_logprobs(completion: Any) -> list[dict]:
113
+ def extract_logprobs(completion: Any) -> list[dict[str, Any]]:
114
114
  """
115
115
  Extracts and filters logprobs from completion.
116
116
  Skips punctuation and structural tokens.
117
117
  """
118
118
  logprobs_data = []
119
-
120
119
  ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
121
120
 
122
121
  for choice in completion.choices:
@@ -30,10 +30,17 @@ class AsyncTheTool:
30
30
  model: str,
31
31
  raise_on_error: bool = True,
32
32
  ):
33
+ """
34
+ Initialize the AsyncTheTool instance.
35
+
36
+ Args:
37
+ client: An AsyncOpenAI client instance for making asynchronous API calls
38
+ model: The name of the model
39
+ raise_on_error: If True, raises exceptions on errors; if False, logs errors and continues
40
+ """
33
41
  self._operator = AsyncOperator(client=client, model=model)
34
42
  self.logger = logging.getLogger(self.__class__.__name__)
35
43
  self.raise_on_error = raise_on_error
36
- self.model = model
37
44
 
38
45
  async def categorize(
39
46
  self,
@@ -98,7 +105,7 @@ class AsyncTheTool:
98
105
  metadata = ToolOutputMetadata(
99
106
  tool_name=tool_name,
100
107
  execution_time=perf_counter() - start,
101
- processed_by=self.model,
108
+ processed_by=operator_output.processed_by,
102
109
  token_usage=operator_output.token_usage,
103
110
  )
104
111
  tool_output = ToolOutput(
@@ -163,7 +170,7 @@ class AsyncTheTool:
163
170
  metadata = ToolOutputMetadata(
164
171
  tool_name=tool_name,
165
172
  execution_time=perf_counter() - start,
166
- processed_by=self.model,
173
+ processed_by=level_operator_output.processed_by,
167
174
  token_usage=token_usage,
168
175
  )
169
176
  tool_output = ToolOutput(
@@ -252,7 +259,7 @@ class AsyncTheTool:
252
259
  metadata = ToolOutputMetadata(
253
260
  tool_name=tool_name,
254
261
  execution_time=perf_counter() - start,
255
- processed_by=self.model,
262
+ processed_by=operator_output.processed_by,
256
263
  token_usage=operator_output.token_usage,
257
264
  )
258
265
  tool_output = ToolOutput(
@@ -339,7 +346,7 @@ class AsyncTheTool:
339
346
  metadata = ToolOutputMetadata(
340
347
  tool_name=tool_name,
341
348
  execution_time=perf_counter() - start,
342
- processed_by=self.model,
349
+ processed_by=operator_output.processed_by,
343
350
  token_usage=operator_output.token_usage,
344
351
  )
345
352
  tool_output = ToolOutput(
@@ -421,7 +428,7 @@ class AsyncTheTool:
421
428
  metadata = ToolOutputMetadata(
422
429
  tool_name=tool_name,
423
430
  execution_time=perf_counter() - start,
424
- processed_by=self.model,
431
+ processed_by=operator_output.processed_by,
425
432
  token_usage=operator_output.token_usage,
426
433
  )
427
434
  tool_output = ToolOutput(
@@ -510,7 +517,7 @@ class AsyncTheTool:
510
517
  metadata = ToolOutputMetadata(
511
518
  tool_name=tool_name,
512
519
  execution_time=perf_counter() - start,
513
- processed_by=self.model,
520
+ processed_by=operator_output.processed_by,
514
521
  token_usage=operator_output.token_usage,
515
522
  )
516
523
  tool_output = ToolOutput(
@@ -597,7 +604,7 @@ class AsyncTheTool:
597
604
  metadata = ToolOutputMetadata(
598
605
  tool_name=tool_name,
599
606
  execution_time=perf_counter() - start,
600
- processed_by=self.model,
607
+ processed_by=operator_output.processed_by,
601
608
  token_usage=operator_output.token_usage,
602
609
  )
603
610
  tool_output = ToolOutput(
@@ -683,7 +690,7 @@ class AsyncTheTool:
683
690
  metadata = ToolOutputMetadata(
684
691
  tool_name=tool_name,
685
692
  execution_time=perf_counter() - start,
686
- processed_by=self.model,
693
+ processed_by=operator_output.processed_by,
687
694
  token_usage=operator_output.token_usage,
688
695
  )
689
696
  tool_output = ToolOutput(
@@ -767,7 +774,7 @@ class AsyncTheTool:
767
774
  metadata = ToolOutputMetadata(
768
775
  tool_name=tool_name,
769
776
  execution_time=perf_counter() - start,
770
- processed_by=self.model,
777
+ processed_by=operator_output.processed_by,
771
778
  token_usage=operator_output.token_usage,
772
779
  )
773
780
  tool_output = ToolOutput(
@@ -872,7 +879,7 @@ class AsyncTheTool:
872
879
  metadata = ToolOutputMetadata(
873
880
  tool_name=tool_name,
874
881
  execution_time=perf_counter() - start,
875
- processed_by=self.model,
882
+ processed_by=chunk_operator_output.processed_by,
876
883
  token_usage=token_usage,
877
884
  )
878
885
  tool_output = ToolOutput(
@@ -908,7 +915,7 @@ class AsyncTheTool:
908
915
  metadata = ToolOutputMetadata(
909
916
  tool_name=tool_name,
910
917
  execution_time=perf_counter() - start,
911
- processed_by=self.model,
918
+ processed_by=operator_output.processed_by,
912
919
  token_usage=operator_output.token_usage,
913
920
  )
914
921
  tool_output = ToolOutput(
@@ -994,7 +1001,7 @@ class AsyncTheTool:
994
1001
  metadata = ToolOutputMetadata(
995
1002
  tool_name=tool_name,
996
1003
  execution_time=perf_counter() - start,
997
- processed_by=self.model,
1004
+ processed_by=operator_output.processed_by,
998
1005
  token_usage=operator_output.token_usage,
999
1006
  )
1000
1007
  tool_output = ToolOutput(
@@ -1083,7 +1090,7 @@ class AsyncTheTool:
1083
1090
  metadata = ToolOutputMetadata(
1084
1091
  tool_name=tool_name,
1085
1092
  execution_time=perf_counter() - start,
1086
- processed_by=self.model,
1093
+ processed_by=operator_output.processed_by,
1087
1094
  token_usage=operator_output.token_usage,
1088
1095
  )
1089
1096
  tool_output = ToolOutput(
@@ -1171,7 +1178,7 @@ class AsyncTheTool:
1171
1178
  metadata = ToolOutputMetadata(
1172
1179
  tool_name=tool_name,
1173
1180
  execution_time=perf_counter() - start,
1174
- processed_by=self.model,
1181
+ processed_by=operator_output.processed_by,
1175
1182
  token_usage=operator_output.token_usage,
1176
1183
  )
1177
1184
  tool_output = ToolOutput(
@@ -15,6 +15,15 @@ class BatchTheTool:
15
15
  raise_on_error: bool = True,
16
16
  max_concurrency: int = 5,
17
17
  ):
18
+ """
19
+ Initialize the BatchTheTool instance.
20
+
21
+ Arguments:
22
+ client: An AsyncOpenAI client instance for making asynchronous API calls
23
+ model: The name of the model
24
+ raise_on_error: If True, raises exceptions on errors; if False, logs errors and continues
25
+ max_concurrency: Maximum number of concurrent API requests allowed
26
+ """
18
27
  self.tool = AsyncTheTool(client, model, raise_on_error)
19
28
  self.semaphore = asyncio.Semaphore(max_concurrency)
20
29
 
@@ -30,10 +30,17 @@ class TheTool:
30
30
  model: str,
31
31
  raise_on_error: bool = True,
32
32
  ):
33
+ """
34
+ Initialize the TheTool instance.
35
+
36
+ Args:
37
+ client: An OpenAI client instance for making API calls
38
+ model: The name of the model
39
+ raise_on_error: If True, raises exceptions on errors; if False, logs errors and continues
40
+ """
33
41
  self._operator = Operator(client=client, model=model)
34
42
  self.logger = logging.getLogger(self.__class__.__name__)
35
43
  self.raise_on_error = raise_on_error
36
- self.model = model
37
44
 
38
45
  def categorize(
39
46
  self,
@@ -93,7 +100,7 @@ class TheTool:
93
100
  metadata = ToolOutputMetadata(
94
101
  tool_name=tool_name,
95
102
  execution_time=perf_counter() - start,
96
- processed_by=self.model,
103
+ processed_by=operator_output.processed_by,
97
104
  token_usage=operator_output.token_usage,
98
105
  )
99
106
  tool_output = ToolOutput(
@@ -155,7 +162,7 @@ class TheTool:
155
162
  metadata = ToolOutputMetadata(
156
163
  tool_name=tool_name,
157
164
  execution_time=perf_counter() - start,
158
- processed_by=self.model,
165
+ processed_by=level_operator_output.processed_by,
159
166
  token_usage=token_usage,
160
167
  )
161
168
  tool_output = ToolOutput(
@@ -239,7 +246,7 @@ class TheTool:
239
246
  metadata = ToolOutputMetadata(
240
247
  tool_name=tool_name,
241
248
  execution_time=perf_counter() - start,
242
- processed_by=self.model,
249
+ processed_by=operator_output.processed_by,
243
250
  token_usage=operator_output.token_usage,
244
251
  )
245
252
  tool_output = ToolOutput(
@@ -321,7 +328,7 @@ class TheTool:
321
328
  metadata = ToolOutputMetadata(
322
329
  tool_name=tool_name,
323
330
  execution_time=perf_counter() - start,
324
- processed_by=self.model,
331
+ processed_by=operator_output.processed_by,
325
332
  token_usage=operator_output.token_usage,
326
333
  )
327
334
  tool_output = ToolOutput(
@@ -398,7 +405,7 @@ class TheTool:
398
405
  metadata = ToolOutputMetadata(
399
406
  tool_name=tool_name,
400
407
  execution_time=perf_counter() - start,
401
- processed_by=self.model,
408
+ processed_by=operator_output.processed_by,
402
409
  token_usage=operator_output.token_usage,
403
410
  )
404
411
  tool_output = ToolOutput(
@@ -482,7 +489,7 @@ class TheTool:
482
489
  metadata = ToolOutputMetadata(
483
490
  tool_name=tool_name,
484
491
  execution_time=perf_counter() - start,
485
- processed_by=self.model,
492
+ processed_by=operator_output.processed_by,
486
493
  token_usage=operator_output.token_usage,
487
494
  )
488
495
  tool_output = ToolOutput(
@@ -564,7 +571,7 @@ class TheTool:
564
571
  metadata = ToolOutputMetadata(
565
572
  tool_name=tool_name,
566
573
  execution_time=perf_counter() - start,
567
- processed_by=self.model,
574
+ processed_by=operator_output.processed_by,
568
575
  token_usage=operator_output.token_usage,
569
576
  )
570
577
  tool_output = ToolOutput(
@@ -645,7 +652,7 @@ class TheTool:
645
652
  metadata = ToolOutputMetadata(
646
653
  tool_name=tool_name,
647
654
  execution_time=perf_counter() - start,
648
- processed_by=self.model,
655
+ processed_by=operator_output.processed_by,
649
656
  token_usage=operator_output.token_usage,
650
657
  )
651
658
  tool_output = ToolOutput(
@@ -724,7 +731,7 @@ class TheTool:
724
731
  metadata = ToolOutputMetadata(
725
732
  tool_name=tool_name,
726
733
  execution_time=perf_counter() - start,
727
- processed_by=self.model,
734
+ processed_by=operator_output.processed_by,
728
735
  token_usage=operator_output.token_usage,
729
736
  )
730
737
  tool_output = ToolOutput(
@@ -824,7 +831,7 @@ class TheTool:
824
831
  metadata = ToolOutputMetadata(
825
832
  tool_name=tool_name,
826
833
  execution_time=perf_counter() - start,
827
- processed_by=self.model,
834
+ processed_by=chunk_operator_output.processed_by,
828
835
  token_usage=token_usage,
829
836
  )
830
837
  tool_output = ToolOutput(
@@ -857,7 +864,7 @@ class TheTool:
857
864
  metadata = ToolOutputMetadata(
858
865
  tool_name=tool_name,
859
866
  execution_time=perf_counter() - start,
860
- processed_by=self.model,
867
+ processed_by=operator_output.processed_by,
861
868
  token_usage=operator_output.token_usage,
862
869
  )
863
870
  tool_output = ToolOutput(
@@ -938,7 +945,7 @@ class TheTool:
938
945
  metadata = ToolOutputMetadata(
939
946
  tool_name=tool_name,
940
947
  execution_time=perf_counter() - start,
941
- processed_by=self.model,
948
+ processed_by=operator_output.processed_by,
942
949
  token_usage=operator_output.token_usage,
943
950
  )
944
951
  tool_output = ToolOutput(
@@ -1022,7 +1029,7 @@ class TheTool:
1022
1029
  metadata = ToolOutputMetadata(
1023
1030
  tool_name=tool_name,
1024
1031
  execution_time=perf_counter() - start,
1025
- processed_by=self.model,
1032
+ processed_by=operator_output.processed_by,
1026
1033
  token_usage=operator_output.token_usage,
1027
1034
  )
1028
1035
  tool_output = ToolOutput(
@@ -1105,7 +1112,7 @@ class TheTool:
1105
1112
  metadata = ToolOutputMetadata(
1106
1113
  tool_name=tool_name,
1107
1114
  execution_time=perf_counter() - start,
1108
- processed_by=self.model,
1115
+ processed_by=operator_output.processed_by,
1109
1116
  token_usage=operator_output.token_usage,
1110
1117
  )
1111
1118
  tool_output = ToolOutput(