hamtaa-texttools 1.1.13__py3-none-any.whl → 1.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,10 @@
1
- from typing import Literal, Any, Callable
1
+ from typing import Literal, Any
2
+ from collections.abc import Callable
2
3
 
3
4
  from openai import OpenAI
4
5
 
5
6
  from texttools.tools.internals.sync_operator import Operator
6
- import texttools.tools.internals.output_models as OM
7
+ import texttools.tools.internals.models as Models
7
8
 
8
9
 
9
10
  class TheTool:
@@ -27,19 +28,23 @@ class TheTool:
27
28
  def categorize(
28
29
  self,
29
30
  text: str,
31
+ categories: list[str] | Models.CategoryTree,
30
32
  with_analysis: bool = False,
31
33
  user_prompt: str | None = None,
32
34
  temperature: float | None = 0.0,
33
35
  logprobs: bool = False,
34
36
  top_logprobs: int | None = None,
37
+ mode: Literal["category_list", "category_tree"] = "category_list",
35
38
  validator: Callable[[Any], bool] | None = None,
36
39
  max_validation_retries: int | None = None,
37
- ) -> OM.ToolOutput:
40
+ priority: int | None = 0,
41
+ ) -> Models.ToolOutput:
38
42
  """
39
- Categorize a text into a single Islamic studies domain category.
43
+ Categorize a text into a category / category tree.
40
44
 
41
45
  Arguments:
42
46
  text: The input text to categorize
47
+ categories: The category / category_tree to give to LLM
43
48
  with_analysis: Whether to include detailed reasoning analysis
44
49
  user_prompt: Additional instructions for the categorization
45
50
  temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
@@ -47,30 +52,104 @@ class TheTool:
47
52
  top_logprobs: Number of top token alternatives to return if logprobs enabled
48
53
  validator: Custom validation function to validate the output
49
54
  max_validation_retries: Maximum number of retry attempts if validation fails
55
+ priority: Task execution priority (if enabled by vLLM and model)
50
56
 
51
57
  Returns:
52
58
  ToolOutput: Object containing:
53
- - result (str): The assigned Islamic studies category
59
+ - result (str): The assigned category
54
60
  - logprobs (list | None): Probability data if logprobs enabled
55
61
  - analysis (str | None): Detailed reasoning if with_analysis enabled
56
62
  - errors (list(str) | None): Errors occured during tool call
57
63
  """
58
- return self._operator.run(
59
- # User parameters
60
- text=text,
61
- with_analysis=with_analysis,
62
- user_prompt=user_prompt,
63
- temperature=temperature,
64
- logprobs=logprobs,
65
- top_logprobs=top_logprobs,
66
- validator=validator,
67
- max_validation_retries=max_validation_retries,
68
- # Internal parameters
69
- prompt_file="categorizer.yaml",
70
- output_model=OM.CategorizerOutput,
71
- mode=None,
72
- output_lang=None,
73
- )
64
+ if mode == "category_tree":
65
+ # Initializations
66
+ output = Models.ToolOutput()
67
+ levels = categories.level_count()
68
+ parent_id = 0
69
+ final_output = []
70
+
71
+ for _ in range(levels):
72
+ # Get child nodes for current parent
73
+ parent_node = categories.find_node(parent_id)
74
+ children = categories.find_children(parent_node)
75
+
76
+ # Check if child nodes exist
77
+ if not children:
78
+ output.errors.append(
79
+ f"No categories found for parent_id {parent_id} in the tree"
80
+ )
81
+ return output
82
+
83
+ # Extract category names and descriptions
84
+ category_list = [
85
+ f"Category Name: {node.name}, Description: {node.description}"
86
+ for node in children
87
+ ]
88
+ category_names = [node.name for node in children]
89
+
90
+ # Run categorization for this level
91
+ level_output = self._operator.run(
92
+ # User parameters
93
+ text=text,
94
+ category_list=category_list,
95
+ with_analysis=with_analysis,
96
+ user_prompt=user_prompt,
97
+ temperature=temperature,
98
+ logprobs=logprobs,
99
+ top_logprobs=top_logprobs,
100
+ mode=mode,
101
+ validator=validator,
102
+ max_validation_retries=max_validation_retries,
103
+ # Internal parameters
104
+ prompt_file="categorize.yaml",
105
+ output_model=Models.create_dynamic_model(category_names),
106
+ output_lang=None,
107
+ )
108
+
109
+ # Check for errors from operator
110
+ if level_output.errors:
111
+ output.errors.extend(level_output.errors)
112
+ return output
113
+
114
+ # Get the chosen category
115
+ chosen_category = level_output.result
116
+
117
+ # Find the corresponding node
118
+ parent_node = categories.find_node(chosen_category)
119
+ if parent_node is None:
120
+ output.errors.append(
121
+ f"Category '{chosen_category}' not found in tree after selection"
122
+ )
123
+ return output
124
+
125
+ parent_id = parent_node.node_id
126
+ final_output.append(parent_node.name)
127
+
128
+ # Copy analysis/logprobs from the last level's output
129
+ output.analysis = level_output.analysis
130
+ output.logprobs = level_output.logprobs
131
+
132
+ output.result = final_output
133
+ return output
134
+
135
+ else:
136
+ return self._operator.run(
137
+ # User parameters
138
+ text=text,
139
+ category_list=categories,
140
+ with_analysis=with_analysis,
141
+ user_prompt=user_prompt,
142
+ temperature=temperature,
143
+ logprobs=logprobs,
144
+ top_logprobs=top_logprobs,
145
+ mode=mode,
146
+ validator=validator,
147
+ max_validation_retries=max_validation_retries,
148
+ # Internal parameters
149
+ prompt_file="categorize.yaml",
150
+ output_model=Models.create_dynamic_model(categories),
151
+ output_lang=None,
152
+ )
74
153
 
75
154
  def extract_keywords(
76
155
  self,
@@ -81,9 +160,12 @@ class TheTool:
81
160
  temperature: float | None = 0.0,
82
161
  logprobs: bool = False,
83
162
  top_logprobs: int | None = None,
163
+ mode: Literal["auto", "threshold", "count"] = "auto",
164
+ number_of_keywords: int | None = None,
84
165
  validator: Callable[[Any], bool] | None = None,
85
166
  max_validation_retries: int | None = None,
86
- ) -> OM.ToolOutput:
167
+ priority: int | None = 0,
168
+ ) -> Models.ToolOutput:
87
169
  """
88
170
  Extract salient keywords from text.
89
171
 
@@ -97,6 +179,7 @@ class TheTool:
97
179
  top_logprobs: Number of top token alternatives to return if logprobs enabled
98
180
  validator: Custom validation function to validate the output
99
181
  max_validation_retries: Maximum number of retry attempts if validation fails
182
+ priority: Task execution priority (if enabled by vLLM and model)
100
183
 
101
184
  Returns:
102
185
  ToolOutput: Object containing:
@@ -114,12 +197,14 @@ class TheTool:
114
197
  temperature=temperature,
115
198
  logprobs=logprobs,
116
199
  top_logprobs=top_logprobs,
200
+ mode=mode,
201
+ number_of_keywords=number_of_keywords,
117
202
  validator=validator,
118
203
  max_validation_retries=max_validation_retries,
119
204
  # Internal parameters
120
205
  prompt_file="extract_keywords.yaml",
121
- output_model=OM.ListStrOutput,
122
- mode=None,
206
+ output_model=Models.ListStrOutput,
207
+ priority=priority,
123
208
  )
124
209
 
125
210
  def extract_entities(
@@ -133,7 +218,8 @@ class TheTool:
133
218
  top_logprobs: int | None = None,
134
219
  validator: Callable[[Any], bool] | None = None,
135
220
  max_validation_retries: int | None = None,
136
- ) -> OM.ToolOutput:
221
+ priority: int | None = 0,
222
+ ) -> Models.ToolOutput:
137
223
  """
138
224
  Perform Named Entity Recognition (NER) over the input text.
139
225
 
@@ -147,6 +233,7 @@ class TheTool:
147
233
  top_logprobs: Number of top token alternatives to return if logprobs enabled
148
234
  validator: Custom validation function to validate the output
149
235
  max_validation_retries: Maximum number of retry attempts if validation fails
236
+ priority: Task execution priority (if enabled by vLLM and model)
150
237
 
151
238
  Returns:
152
239
  ToolOutput: Object containing:
@@ -168,8 +255,9 @@ class TheTool:
168
255
  max_validation_retries=max_validation_retries,
169
256
  # Internal parameters
170
257
  prompt_file="extract_entities.yaml",
171
- output_model=OM.ListDictStrStrOutput,
258
+ output_model=Models.ListDictStrStrOutput,
172
259
  mode=None,
260
+ priority=priority,
173
261
  )
174
262
 
175
263
  def is_question(
@@ -182,7 +270,8 @@ class TheTool:
182
270
  top_logprobs: int | None = None,
183
271
  validator: Callable[[Any], bool] | None = None,
184
272
  max_validation_retries: int | None = None,
185
- ) -> OM.ToolOutput:
273
+ priority: int | None = 0,
274
+ ) -> Models.ToolOutput:
186
275
  """
187
276
  Detect if the input is phrased as a question.
188
277
 
@@ -195,6 +284,7 @@ class TheTool:
195
284
  top_logprobs: Number of top token alternatives to return if logprobs enabled
196
285
  validator: Custom validation function to validate the output
197
286
  max_validation_retries: Maximum number of retry attempts if validation fails
287
+ priority: Task execution priority (if enabled by vLLM and model)
198
288
 
199
289
  Returns:
200
290
  ToolOutput: Object containing:
@@ -215,9 +305,10 @@ class TheTool:
215
305
  max_validation_retries=max_validation_retries,
216
306
  # Internal parameters
217
307
  prompt_file="is_question.yaml",
218
- output_model=OM.BoolOutput,
308
+ output_model=Models.BoolOutput,
219
309
  mode=None,
220
310
  output_lang=None,
311
+ priority=priority,
221
312
  )
222
313
 
223
314
  def text_to_question(
@@ -231,7 +322,8 @@ class TheTool:
231
322
  top_logprobs: int | None = None,
232
323
  validator: Callable[[Any], bool] | None = None,
233
324
  max_validation_retries: int | None = None,
234
- ) -> OM.ToolOutput:
325
+ priority: int | None = 0,
326
+ ) -> Models.ToolOutput:
235
327
  """
236
328
  Generate a single question from the given text.
237
329
 
@@ -245,6 +337,7 @@ class TheTool:
245
337
  top_logprobs: Number of top token alternatives to return if logprobs enabled
246
338
  validator: Custom validation function to validate the output
247
339
  max_validation_retries: Maximum number of retry attempts if validation fails
340
+ priority: Task execution priority (if enabled by vLLM and model)
248
341
 
249
342
  Returns:
250
343
  ToolOutput: Object containing:
@@ -266,8 +359,9 @@ class TheTool:
266
359
  max_validation_retries=max_validation_retries,
267
360
  # Internal parameters
268
361
  prompt_file="text_to_question.yaml",
269
- output_model=OM.StrOutput,
362
+ output_model=Models.StrOutput,
270
363
  mode=None,
364
+ priority=priority,
271
365
  )
272
366
 
273
367
  def merge_questions(
@@ -282,7 +376,8 @@ class TheTool:
282
376
  mode: Literal["default", "reason"] = "default",
283
377
  validator: Callable[[Any], bool] | None = None,
284
378
  max_validation_retries: int | None = None,
285
- ) -> OM.ToolOutput:
379
+ priority: int | None = 0,
380
+ ) -> Models.ToolOutput:
286
381
  """
287
382
  Merge multiple questions into a single unified question.
288
383
 
@@ -297,6 +392,7 @@ class TheTool:
297
392
  mode: Merging strategy - 'default' for direct merge, 'reason' for reasoned merge
298
393
  validator: Custom validation function to validate the output
299
394
  max_validation_retries: Maximum number of retry attempts if validation fails
395
+ priority: Task execution priority (if enabled by vLLM and model)
300
396
 
301
397
  Returns:
302
398
  ToolOutput: Object containing:
@@ -319,8 +415,9 @@ class TheTool:
319
415
  max_validation_retries=max_validation_retries,
320
416
  # Internal parameters
321
417
  prompt_file="merge_questions.yaml",
322
- output_model=OM.StrOutput,
418
+ output_model=Models.StrOutput,
323
419
  mode=mode,
420
+ priority=priority,
324
421
  )
325
422
 
326
423
  def rewrite(
@@ -335,7 +432,8 @@ class TheTool:
335
432
  mode: Literal["positive", "negative", "hard_negative"] = "positive",
336
433
  validator: Callable[[Any], bool] | None = None,
337
434
  max_validation_retries: int | None = None,
338
- ) -> OM.ToolOutput:
435
+ priority: int | None = 0,
436
+ ) -> Models.ToolOutput:
339
437
  """
340
438
  Rewrite a text with different modes.
341
439
 
@@ -350,6 +448,7 @@ class TheTool:
350
448
  mode: Rewriting mode - 'positive', 'negative', or 'hard_negative'
351
449
  validator: Custom validation function to validate the output
352
450
  max_validation_retries: Maximum number of retry attempts if validation fails
451
+ priority: Task execution priority (if enabled by vLLM and model)
353
452
 
354
453
  Returns:
355
454
  ToolOutput: Object containing:
@@ -371,8 +470,9 @@ class TheTool:
371
470
  max_validation_retries=max_validation_retries,
372
471
  # Internal parameters
373
472
  prompt_file="rewrite.yaml",
374
- output_model=OM.StrOutput,
473
+ output_model=Models.StrOutput,
375
474
  mode=mode,
475
+ priority=priority,
376
476
  )
377
477
 
378
478
  def subject_to_question(
@@ -387,7 +487,8 @@ class TheTool:
387
487
  top_logprobs: int | None = None,
388
488
  validator: Callable[[Any], bool] | None = None,
389
489
  max_validation_retries: int | None = None,
390
- ) -> OM.ToolOutput:
490
+ priority: int | None = 0,
491
+ ) -> Models.ToolOutput:
391
492
  """
392
493
  Generate a list of questions about a subject.
393
494
 
@@ -402,6 +503,7 @@ class TheTool:
402
503
  top_logprobs: Number of top token alternatives to return if logprobs enabled
403
504
  validator: Custom validation function to validate the output
404
505
  max_validation_retries: Maximum number of retry attempts if validation fails
506
+ priority: Task execution priority (if enabled by vLLM and model)
405
507
 
406
508
  Returns:
407
509
  ToolOutput: Object containing:
@@ -424,8 +526,9 @@ class TheTool:
424
526
  max_validation_retries=max_validation_retries,
425
527
  # Internal parameters
426
528
  prompt_file="subject_to_question.yaml",
427
- output_model=OM.ReasonListStrOutput,
529
+ output_model=Models.ReasonListStrOutput,
428
530
  mode=None,
531
+ priority=priority,
429
532
  )
430
533
 
431
534
  def summarize(
@@ -439,7 +542,8 @@ class TheTool:
439
542
  top_logprobs: int | None = None,
440
543
  validator: Callable[[Any], bool] | None = None,
441
544
  max_validation_retries: int | None = None,
442
- ) -> OM.ToolOutput:
545
+ priority: int | None = 0,
546
+ ) -> Models.ToolOutput:
443
547
  """
444
548
  Summarize the given subject text.
445
549
 
@@ -453,6 +557,7 @@ class TheTool:
453
557
  top_logprobs: Number of top token alternatives to return if logprobs enabled
454
558
  validator: Custom validation function to validate the output
455
559
  max_validation_retries: Maximum number of retry attempts if validation fails
560
+ priority: Task execution priority (if enabled by vLLM and model)
456
561
 
457
562
  Returns:
458
563
  ToolOutput: Object containing:
@@ -474,8 +579,9 @@ class TheTool:
474
579
  max_validation_retries=max_validation_retries,
475
580
  # Internal parameters
476
581
  prompt_file="summarize.yaml",
477
- output_model=OM.StrOutput,
582
+ output_model=Models.StrOutput,
478
583
  mode=None,
584
+ priority=priority,
479
585
  )
480
586
 
481
587
  def translate(
@@ -489,7 +595,8 @@ class TheTool:
489
595
  top_logprobs: int | None = None,
490
596
  validator: Callable[[Any], bool] | None = None,
491
597
  max_validation_retries: int | None = None,
492
- ) -> OM.ToolOutput:
598
+ priority: int | None = 0,
599
+ ) -> Models.ToolOutput:
493
600
  """
494
601
  Translate text between languages.
495
602
 
@@ -503,6 +610,7 @@ class TheTool:
503
610
  top_logprobs: Number of top token alternatives to return if logprobs enabled
504
611
  validator: Custom validation function to validate the output
505
612
  max_validation_retries: Maximum number of retry attempts if validation fails
613
+ priority: Task execution priority (if enabled by vLLM and model)
506
614
 
507
615
  Returns:
508
616
  ToolOutput: Object containing:
@@ -524,9 +632,63 @@ class TheTool:
524
632
  max_validation_retries=max_validation_retries,
525
633
  # Internal parameters
526
634
  prompt_file="translate.yaml",
527
- output_model=OM.StrOutput,
635
+ output_model=Models.StrOutput,
528
636
  mode=None,
529
637
  output_lang=None,
638
+ priority=priority,
639
+ )
640
+
641
+ def detect_entity(
642
+ self,
643
+ text: str,
644
+ with_analysis: bool = False,
645
+ output_lang: str | None = None,
646
+ user_prompt: str | None = None,
647
+ temperature: float | None = 0.0,
648
+ logprobs: bool = False,
649
+ top_logprobs: int | None = None,
650
+ validator: Callable[[Any], bool] | None = None,
651
+ max_validation_retries: int | None = None,
652
+ priority: int | None = 0,
653
+ ) -> Models.ToolOutput:
654
+ """
655
+ Detects entities in a given text based on the entity_detector.yaml prompt.
656
+
657
+ Arguments:
658
+ text: The input text
659
+ with_analysis: Whether to include detailed reasoning analysis
660
+ output_lang: Language for the output summary
661
+ user_prompt: Additional instructions for summarization
662
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
663
+ logprobs: Whether to return token probability information
664
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
665
+ validator: Custom validation function to validate the output
666
+ max_validation_retries: Maximum number of retry attempts if validation fails
667
+ priority: Task execution priority (if enabled by vLLM and model)
668
+
669
+ Returns:
670
+ ToolOutput: Object containing:
671
+ - result (list[Entity]): The entities
672
+ - logprobs (list | None): Probability data if logprobs enabled
673
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
674
+ - errors (list(str) | None): Errors occured during tool call
675
+ """
676
+ return self._operator.run(
677
+ # User parameters
678
+ text=text,
679
+ with_analysis=with_analysis,
680
+ output_lang=output_lang,
681
+ user_prompt=user_prompt,
682
+ temperature=temperature,
683
+ logprobs=logprobs,
684
+ top_logprobs=top_logprobs,
685
+ validator=validator,
686
+ max_validation_retries=max_validation_retries,
687
+ # Internal parameters
688
+ prompt_file="detect_entity.yaml",
689
+ output_model=Models.EntityDetectorOutput,
690
+ mode=None,
691
+ priority=priority,
530
692
  )
531
693
 
532
694
  def run_custom(
@@ -539,7 +701,8 @@ class TheTool:
539
701
  top_logprobs: int | None = None,
540
702
  validator: Callable[[Any], bool] | None = None,
541
703
  max_validation_retries: int | None = None,
542
- ) -> OM.ToolOutput:
704
+ priority: int | None = 0,
705
+ ) -> Models.ToolOutput:
543
706
  """
544
707
  Custom tool that can do almost anything!
545
708
 
@@ -551,6 +714,7 @@ class TheTool:
551
714
  top_logprobs: Number of top token alternatives to return if logprobs enabled
552
715
  validator: Custom validation function to validate the output
553
716
  max_validation_retries: Maximum number of retry attempts if validation fails
717
+ priority: Task execution priority (if enabled by vLLM and model)
554
718
 
555
719
  Returns:
556
720
  ToolOutput: Object containing:
@@ -575,4 +739,5 @@ class TheTool:
575
739
  user_prompt=None,
576
740
  with_analysis=False,
577
741
  mode=None,
742
+ priority=priority,
578
743
  )
@@ -1,28 +0,0 @@
1
- main_template: |
2
- تو یک متخصص علوم دینی هستی
3
- من یک متن به تو میدهم و تو باید
4
- آن متن را در یکی از دسته بندی های زیر طبقه بندی کنی
5
- دسته بندی ها:
6
- "باورهای دینی",
7
- "اخلاق اسلامی",
8
- "احکام و فقه",
9
- "تاریخ اسلام و شخصیت ها",
10
- "منابع دینی",
11
- "دین و جامعه/سیاست",
12
- "عرفان و معنویت",
13
- "هیچکدام",
14
- فقط با این فرمت json پاسخ بده:
15
- {{
16
- "reason": "<دلیل انتخابت رو به صورت خلاصه بگو>",
17
- "result": "<یکی از دسته بندی ها>"
18
- }}
19
- متنی که باید طبقه بندی کنی:
20
- {input}
21
-
22
- analyze_template: |
23
- ما میخواهیم متنی که داده می شود را طبقه بندی کنیم.
24
- برای بهبود طبقه بندی، نیاز به آنالیز متن داریم.
25
- متنی که داده می شود را آنالیز کن و ایده اصلی و آنالیزی کوتاه از آن را بنویس.
26
- آنالیز باید بسیار خلاصه باشد
27
- نهایتا 20 کلمه
28
- {input}
@@ -1,62 +0,0 @@
1
- from typing import Literal, Any
2
-
3
- from pydantic import BaseModel, Field
4
-
5
-
6
- class ToolOutput(BaseModel):
7
- result: Any = None
8
- analysis: str = ""
9
- logprobs: list[dict[str, Any]] = []
10
- errors: list[str] = []
11
-
12
- def __repr__(self) -> str:
13
- return f"ToolOutput(result_type='{type(self.result)}', result='{self.result}', analysis='{self.analysis}', logprobs='{self.logprobs}', errors='{self.errors}'"
14
-
15
-
16
- class StrOutput(BaseModel):
17
- result: str = Field(..., description="The output string")
18
-
19
-
20
- class BoolOutput(BaseModel):
21
- result: bool = Field(
22
- ..., description="Boolean indicating the output state", example=True
23
- )
24
-
25
-
26
- class ListStrOutput(BaseModel):
27
- result: list[str] = Field(
28
- ..., description="The output list of strings", example=["text_1", "text_2"]
29
- )
30
-
31
-
32
- class ListDictStrStrOutput(BaseModel):
33
- result: list[dict[str, str]] = Field(
34
- ...,
35
- description="List of dictionaries containing string key-value pairs",
36
- example=[{"text": "Mohammad", "type": "PER"}],
37
- )
38
-
39
-
40
- class ReasonListStrOutput(BaseModel):
41
- reason: str = Field(..., description="Thinking process that led to the output")
42
- result: list[str] = Field(..., description="The output list of strings")
43
-
44
-
45
- class CategorizerOutput(BaseModel):
46
- reason: str = Field(
47
- ..., description="Explanation of why the input belongs to the category"
48
- )
49
- result: Literal[
50
- "باورهای دینی",
51
- "اخلاق اسلامی",
52
- "احکام و فقه",
53
- "تاریخ اسلام و شخصیت ها",
54
- "منابع دینی",
55
- "دین و جامعه/سیاست",
56
- "عرفان و معنویت",
57
- "هیچکدام",
58
- ] = Field(
59
- ...,
60
- description="Predicted category label",
61
- example="اخلاق اسلامی",
62
- )