hamtaa-texttools 1.1.23__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {hamtaa_texttools-1.1.23/hamtaa_texttools.egg-info → hamtaa_texttools-1.3.0}/PKG-INFO +16 -26
  2. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/README.md +5 -1
  3. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0/hamtaa_texttools.egg-info}/PKG-INFO +16 -26
  4. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/hamtaa_texttools.egg-info/SOURCES.txt +13 -12
  5. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/hamtaa_texttools.egg-info/requires.txt +1 -1
  6. hamtaa_texttools-1.3.0/pyproject.toml +45 -0
  7. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/tests/test_all_async_tools.py +5 -2
  8. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/__init__.py +4 -4
  9. hamtaa_texttools-1.3.0/texttools/batch/__init__.py +0 -0
  10. hamtaa_texttools-1.1.23/texttools/batch/batch_config.py → hamtaa_texttools-1.3.0/texttools/batch/config.py +2 -2
  11. hamtaa_texttools-1.1.23/texttools/batch/batch_manager.py → hamtaa_texttools-1.3.0/texttools/batch/manager.py +3 -3
  12. hamtaa_texttools-1.1.23/texttools/batch/batch_runner.py → hamtaa_texttools-1.3.0/texttools/batch/runner.py +6 -6
  13. hamtaa_texttools-1.3.0/texttools/core/__init__.py +0 -0
  14. hamtaa_texttools-1.3.0/texttools/core/engine.py +264 -0
  15. hamtaa_texttools-1.3.0/texttools/core/internal_models.py +58 -0
  16. {hamtaa_texttools-1.1.23/texttools/internals → hamtaa_texttools-1.3.0/texttools/core/operators}/async_operator.py +4 -10
  17. {hamtaa_texttools-1.1.23/texttools/internals → hamtaa_texttools-1.3.0/texttools/core/operators}/sync_operator.py +4 -10
  18. hamtaa_texttools-1.3.0/texttools/models.py +88 -0
  19. hamtaa_texttools-1.3.0/texttools/py.typed +0 -0
  20. hamtaa_texttools-1.3.0/texttools/tools/__init__.py +0 -0
  21. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/tools/async_tools.py +420 -348
  22. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/tools/sync_tools.py +113 -111
  23. hamtaa_texttools-1.1.23/MANIFEST.in +0 -2
  24. hamtaa_texttools-1.1.23/pyproject.toml +0 -34
  25. hamtaa_texttools-1.1.23/texttools/internals/models.py +0 -150
  26. hamtaa_texttools-1.1.23/texttools/internals/operator_utils.py +0 -73
  27. hamtaa_texttools-1.1.23/texttools/internals/prompt_loader.py +0 -86
  28. hamtaa_texttools-1.1.23/texttools/internals/text_to_chunks.py +0 -97
  29. hamtaa_texttools-1.1.23/texttools/prompts/README.md +0 -35
  30. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/LICENSE +0 -0
  31. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  32. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  33. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/setup.cfg +0 -0
  34. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/tests/test_all_tools.py +0 -0
  35. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/tests/test_output_validation.py +0 -0
  36. {hamtaa_texttools-1.1.23/texttools/internals → hamtaa_texttools-1.3.0/texttools/core}/exceptions.py +0 -0
  37. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/prompts/categorize.yaml +0 -0
  38. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/prompts/check_fact.yaml +0 -0
  39. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/prompts/extract_entities.yaml +0 -0
  40. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/prompts/extract_keywords.yaml +0 -0
  41. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/prompts/is_question.yaml +0 -0
  42. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/prompts/merge_questions.yaml +0 -0
  43. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/prompts/propositionize.yaml +0 -0
  44. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/prompts/rewrite.yaml +0 -0
  45. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/prompts/run_custom.yaml +0 -0
  46. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/prompts/subject_to_question.yaml +0 -0
  47. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/prompts/summarize.yaml +0 -0
  48. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/prompts/text_to_question.yaml +0 -0
  49. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.3.0}/texttools/prompts/translate.yaml +0 -0
@@ -1,34 +1,20 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.23
3
+ Version: 1.3.0
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
- Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
- License: MIT License
7
-
8
- Copyright (c) 2025 Hamtaa
9
-
10
- Permission is hereby granted, free of charge, to any person obtaining a copy
11
- of this software and associated documentation files (the "Software"), to deal
12
- in the Software without restriction, including without limitation the rights
13
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
- copies of the Software, and to permit persons to whom the Software is
15
- furnished to do so, subject to the following conditions:
16
-
17
- The above copyright notice and this permission notice shall be included in all
18
- copies or substantial portions of the Software.
19
-
20
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
- SOFTWARE.
5
+ Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
+ Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
7
+ License: MIT
27
8
  Keywords: nlp,llm,text-processing,openai
28
- Requires-Python: >=3.8
9
+ Classifier: Development Status :: 5 - Production/Stable
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Classifier: Topic :: Text Processing
13
+ Classifier: Operating System :: OS Independent
14
+ Requires-Python: >=3.9
29
15
  Description-Content-Type: text/markdown
30
16
  License-File: LICENSE
31
- Requires-Dist: openai==1.97.1
17
+ Requires-Dist: openai>=1.97.1
32
18
  Requires-Dist: pydantic>=2.0.0
33
19
  Requires-Dist: pyyaml>=6.0
34
20
  Dynamic: license-file
@@ -87,7 +73,7 @@ pip install -U hamtaa-texttools
87
73
 
88
74
  ---
89
75
 
90
- ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
76
+ ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator`, `priority` and `timeout` parameters
91
77
 
92
78
  TextTools provides several optional flags to customize LLM behavior:
93
79
 
@@ -108,6 +94,10 @@ TextTools provides several optional flags to customize LLM behavior:
108
94
  - **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
109
95
  **Note:** This feature works if it's supported by the model and vLLM.
110
96
 
97
+ - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error
98
+ **Note:** This feature only exists in `AsyncTheTool`.
99
+
100
+
111
101
  ---
112
102
 
113
103
  ## 🧩 ToolOutput
@@ -52,7 +52,7 @@ pip install -U hamtaa-texttools
52
52
 
53
53
  ---
54
54
 
55
- ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
55
+ ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator`, `priority` and `timeout` parameters
56
56
 
57
57
  TextTools provides several optional flags to customize LLM behavior:
58
58
 
@@ -73,6 +73,10 @@ TextTools provides several optional flags to customize LLM behavior:
73
73
  - **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
74
74
  **Note:** This feature works if it's supported by the model and vLLM.
75
75
 
76
+ - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error
77
+ **Note:** This feature only exists in `AsyncTheTool`.
78
+
79
+
76
80
  ---
77
81
 
78
82
  ## 🧩 ToolOutput
@@ -1,34 +1,20 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.23
3
+ Version: 1.3.0
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
- Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
- License: MIT License
7
-
8
- Copyright (c) 2025 Hamtaa
9
-
10
- Permission is hereby granted, free of charge, to any person obtaining a copy
11
- of this software and associated documentation files (the "Software"), to deal
12
- in the Software without restriction, including without limitation the rights
13
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
- copies of the Software, and to permit persons to whom the Software is
15
- furnished to do so, subject to the following conditions:
16
-
17
- The above copyright notice and this permission notice shall be included in all
18
- copies or substantial portions of the Software.
19
-
20
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
- SOFTWARE.
5
+ Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
+ Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
7
+ License: MIT
27
8
  Keywords: nlp,llm,text-processing,openai
28
- Requires-Python: >=3.8
9
+ Classifier: Development Status :: 5 - Production/Stable
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Classifier: Topic :: Text Processing
13
+ Classifier: Operating System :: OS Independent
14
+ Requires-Python: >=3.9
29
15
  Description-Content-Type: text/markdown
30
16
  License-File: LICENSE
31
- Requires-Dist: openai==1.97.1
17
+ Requires-Dist: openai>=1.97.1
32
18
  Requires-Dist: pydantic>=2.0.0
33
19
  Requires-Dist: pyyaml>=6.0
34
20
  Dynamic: license-file
@@ -87,7 +73,7 @@ pip install -U hamtaa-texttools
87
73
 
88
74
  ---
89
75
 
90
- ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
76
+ ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator`, `priority` and `timeout` parameters
91
77
 
92
78
  TextTools provides several optional flags to customize LLM behavior:
93
79
 
@@ -108,6 +94,10 @@ TextTools provides several optional flags to customize LLM behavior:
108
94
  - **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
109
95
  **Note:** This feature works if it's supported by the model and vLLM.
110
96
 
97
+ - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error
98
+ **Note:** This feature only exists in `AsyncTheTool`.
99
+
100
+
111
101
  ---
112
102
 
113
103
  ## 🧩 ToolOutput
@@ -1,5 +1,4 @@
1
1
  LICENSE
2
- MANIFEST.in
3
2
  README.md
4
3
  pyproject.toml
5
4
  hamtaa_texttools.egg-info/PKG-INFO
@@ -11,17 +10,18 @@ tests/test_all_async_tools.py
11
10
  tests/test_all_tools.py
12
11
  tests/test_output_validation.py
13
12
  texttools/__init__.py
14
- texttools/batch/batch_config.py
15
- texttools/batch/batch_manager.py
16
- texttools/batch/batch_runner.py
17
- texttools/internals/async_operator.py
18
- texttools/internals/exceptions.py
19
- texttools/internals/models.py
20
- texttools/internals/operator_utils.py
21
- texttools/internals/prompt_loader.py
22
- texttools/internals/sync_operator.py
23
- texttools/internals/text_to_chunks.py
24
- texttools/prompts/README.md
13
+ texttools/models.py
14
+ texttools/py.typed
15
+ texttools/batch/__init__.py
16
+ texttools/batch/config.py
17
+ texttools/batch/manager.py
18
+ texttools/batch/runner.py
19
+ texttools/core/__init__.py
20
+ texttools/core/engine.py
21
+ texttools/core/exceptions.py
22
+ texttools/core/internal_models.py
23
+ texttools/core/operators/async_operator.py
24
+ texttools/core/operators/sync_operator.py
25
25
  texttools/prompts/categorize.yaml
26
26
  texttools/prompts/check_fact.yaml
27
27
  texttools/prompts/extract_entities.yaml
@@ -35,5 +35,6 @@ texttools/prompts/subject_to_question.yaml
35
35
  texttools/prompts/summarize.yaml
36
36
  texttools/prompts/text_to_question.yaml
37
37
  texttools/prompts/translate.yaml
38
+ texttools/tools/__init__.py
38
39
  texttools/tools/async_tools.py
39
40
  texttools/tools/sync_tools.py
@@ -1,3 +1,3 @@
1
- openai==1.97.1
1
+ openai>=1.97.1
2
2
  pydantic>=2.0.0
3
3
  pyyaml>=6.0
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hamtaa-texttools"
7
+ version = "1.3.0"
8
+ authors = [
9
+ {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
10
+ {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
11
+ {name = "Montazer", email = "montazerh82@gmail.com"},
12
+ {name = "Givechi", email = "mohamad.m.givechi@gmail.com"},
13
+ {name = "Zareshahi", email = "a.zareshahi1377@gmail.com"},
14
+ ]
15
+ maintainers = [
16
+ {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
17
+ {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
18
+ ]
19
+ description = "A high-level NLP toolkit built on top of modern LLMs."
20
+ readme = "README.md"
21
+ license = {text = "MIT"}
22
+ requires-python = ">=3.9"
23
+ dependencies = [
24
+ "openai>=1.97.1",
25
+ "pydantic>=2.0.0",
26
+ "pyyaml>=6.0",
27
+ ]
28
+ keywords = ["nlp", "llm", "text-processing", "openai"]
29
+ classifiers = [
30
+ "Development Status :: 5 - Production/Stable",
31
+ "License :: OSI Approved :: MIT License",
32
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
33
+ "Topic :: Text Processing",
34
+ "Operating System :: OS Independent",
35
+ ]
36
+
37
+ [tool.setuptools.packages.find]
38
+ where = ["."]
39
+ include = ["texttools*"]
40
+
41
+ [tool.setuptools]
42
+ include-package-data = true
43
+
44
+ [tool.setuptools.package-data]
45
+ "texttools" = ["prompts/*.yaml", "py.typed"]
@@ -20,16 +20,19 @@ async def main():
20
20
  category_task = t.categorize(
21
21
  "سلام حالت چطوره؟",
22
22
  categories=["هیچکدام", "دینی", "فلسفه"],
23
- logprobs=True,
23
+ timeout=0.5,
24
24
  )
25
25
  keywords_task = t.extract_keywords("Tomorrow, we will be dead by the car crash")
26
- entities_task = t.extract_entities("We will be dead by the car crash")
26
+ entities_task = t.extract_entities(
27
+ "We will be dead by the car crash", entities=["EVENT"]
28
+ )
27
29
  detection_task = t.is_question("We will be dead by the car crash")
28
30
  question_task = t.text_to_question("We will be dead by the car crash", 2)
29
31
  merged_task = t.merge_questions(
30
32
  ["چرا ما موجوداتی اجتماعی هستیم؟", "چرا باید در کنار هم زندگی کنیم؟"],
31
33
  mode="default",
32
34
  with_analysis=True,
35
+ timeout=5.8,
33
36
  )
34
37
  rewritten_task = t.rewrite(
35
38
  "چرا ما انسان ها، موجوداتی اجتماعی هستیم؟",
@@ -1,7 +1,7 @@
1
- from .tools.sync_tools import TheTool
1
+ from .batch.config import BatchConfig
2
+ from .batch.runner import BatchRunner
3
+ from .models import CategoryTree
2
4
  from .tools.async_tools import AsyncTheTool
3
- from .internals.models import CategoryTree
4
- from .batch.batch_runner import BatchRunner
5
- from .batch.batch_config import BatchConfig
5
+ from .tools.sync_tools import TheTool
6
6
 
7
7
  __all__ = ["TheTool", "AsyncTheTool", "CategoryTree", "BatchRunner", "BatchConfig"]
File without changes
@@ -1,6 +1,6 @@
1
- from typing import Any
2
- from dataclasses import dataclass
3
1
  from collections.abc import Callable
2
+ from dataclasses import dataclass
3
+ from typing import Any
4
4
 
5
5
 
6
6
  def export_data(data) -> list[dict[str, str]]:
@@ -1,12 +1,12 @@
1
1
  import json
2
+ import logging
2
3
  import uuid
3
4
  from pathlib import Path
4
- from typing import Type, TypeVar, Any
5
- import logging
5
+ from typing import Any, Type, TypeVar
6
6
 
7
- from pydantic import BaseModel
8
7
  from openai import OpenAI
9
8
  from openai.lib._pydantic import to_strict_json_schema
9
+ from pydantic import BaseModel
10
10
 
11
11
  # Base Model type for output models
12
12
  T = TypeVar("T", bound=BaseModel)
@@ -1,18 +1,18 @@
1
1
  import json
2
+ import logging
2
3
  import os
3
4
  import time
4
5
  from pathlib import Path
5
- from typing import Type, TypeVar, Any
6
- import logging
6
+ from typing import Any, Type, TypeVar
7
7
 
8
8
  from dotenv import load_dotenv
9
9
  from openai import OpenAI
10
10
  from pydantic import BaseModel
11
11
 
12
- from texttools.batch.batch_manager import BatchManager
13
- from texttools.batch.batch_config import BatchConfig
14
- from texttools.internals.models import Str
15
- from texttools.internals.exceptions import TextToolsError
12
+ from ..core.exceptions import TextToolsError
13
+ from ..core.internal_models import Str
14
+ from .config import BatchConfig
15
+ from .manager import BatchManager
16
16
 
17
17
  # Base Model type for output models
18
18
  T = TypeVar("T", bound=BaseModel)
File without changes
@@ -0,0 +1,264 @@
1
+ import asyncio
2
+ import math
3
+ import random
4
+ import re
5
+ from functools import lru_cache
6
+ from pathlib import Path
7
+
8
+ import yaml
9
+
10
+ from .exceptions import PromptError
11
+
12
+
13
+ class PromptLoader:
14
+ """
15
+ Utility for loading and formatting YAML prompt templates.
16
+ """
17
+
18
+ MAIN_TEMPLATE = "main_template"
19
+ ANALYZE_TEMPLATE = "analyze_template"
20
+
21
+ @lru_cache(maxsize=32)
22
+ def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
23
+ """
24
+ Loads prompt templates from YAML file with optional mode selection.
25
+ """
26
+ try:
27
+ base_dir = Path(__file__).parent.parent / Path("prompts")
28
+ prompt_path = base_dir / prompt_file
29
+
30
+ if not prompt_path.exists():
31
+ raise PromptError(f"Prompt file not found: {prompt_file}")
32
+
33
+ data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
34
+
35
+ if self.MAIN_TEMPLATE not in data:
36
+ raise PromptError(f"Missing 'main_template' in {prompt_file}")
37
+
38
+ if self.ANALYZE_TEMPLATE not in data:
39
+ raise PromptError(f"Missing 'analyze_template' in {prompt_file}")
40
+
41
+ if mode and mode not in data.get(self.MAIN_TEMPLATE, {}):
42
+ raise PromptError(f"Mode '{mode}' not found in {prompt_file}")
43
+
44
+ main_template = (
45
+ data[self.MAIN_TEMPLATE][mode]
46
+ if mode and isinstance(data[self.MAIN_TEMPLATE], dict)
47
+ else data[self.MAIN_TEMPLATE]
48
+ )
49
+
50
+ analyze_template = (
51
+ data[self.ANALYZE_TEMPLATE][mode]
52
+ if mode and isinstance(data[self.ANALYZE_TEMPLATE], dict)
53
+ else data[self.ANALYZE_TEMPLATE]
54
+ )
55
+
56
+ if not main_template or not main_template.strip():
57
+ raise PromptError(
58
+ f"Empty main_template in {prompt_file}"
59
+ + (f" for mode '{mode}'" if mode else "")
60
+ )
61
+
62
+ return {
63
+ self.MAIN_TEMPLATE: main_template,
64
+ self.ANALYZE_TEMPLATE: analyze_template,
65
+ }
66
+
67
+ except yaml.YAMLError as e:
68
+ raise PromptError(f"Invalid YAML in {prompt_file}: {e}")
69
+ except Exception as e:
70
+ raise PromptError(f"Failed to load prompt {prompt_file}: {e}")
71
+
72
+ def load(
73
+ self, prompt_file: str, text: str, mode: str, **extra_kwargs
74
+ ) -> dict[str, str]:
75
+ try:
76
+ template_configs = self._load_templates(prompt_file, mode)
77
+ format_args = {"text": text}
78
+ format_args.update(extra_kwargs)
79
+
80
+ # Inject variables inside each template
81
+ for key in template_configs.keys():
82
+ template_configs[key] = template_configs[key].format(**format_args)
83
+
84
+ return template_configs
85
+
86
+ except KeyError as e:
87
+ raise PromptError(f"Missing template variable: {e}")
88
+ except Exception as e:
89
+ raise PromptError(f"Failed to format prompt: {e}")
90
+
91
+
92
+ class OperatorUtils:
93
+ @staticmethod
94
+ def build_main_prompt(
95
+ main_template: str,
96
+ analysis: str | None,
97
+ output_lang: str | None,
98
+ user_prompt: str | None,
99
+ ) -> str:
100
+ main_prompt = ""
101
+
102
+ if analysis:
103
+ main_prompt += f"Based on this analysis:\n{analysis}\n"
104
+
105
+ if output_lang:
106
+ main_prompt += f"Respond only in the {output_lang} language.\n"
107
+
108
+ if user_prompt:
109
+ main_prompt += f"Consider this instruction {user_prompt}\n"
110
+
111
+ main_prompt += main_template
112
+
113
+ return main_prompt
114
+
115
+ @staticmethod
116
+ def build_message(prompt: str) -> list[dict[str, str]]:
117
+ return [{"role": "user", "content": prompt}]
118
+
119
+ @staticmethod
120
+ def extract_logprobs(completion: dict) -> list[dict]:
121
+ """
122
+ Extracts and filters token probabilities from completion logprobs.
123
+ Skips punctuation and structural tokens, returns cleaned probability data.
124
+ """
125
+ logprobs_data = []
126
+
127
+ ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
128
+
129
+ for choice in completion.choices:
130
+ if not getattr(choice, "logprobs", None):
131
+ raise ValueError("Your model does not support logprobs")
132
+
133
+ for logprob_item in choice.logprobs.content:
134
+ if ignore_pattern.match(logprob_item.token):
135
+ continue
136
+ token_entry = {
137
+ "token": logprob_item.token,
138
+ "prob": round(math.exp(logprob_item.logprob), 8),
139
+ "top_alternatives": [],
140
+ }
141
+ for alt in logprob_item.top_logprobs:
142
+ if ignore_pattern.match(alt.token):
143
+ continue
144
+ token_entry["top_alternatives"].append(
145
+ {
146
+ "token": alt.token,
147
+ "prob": round(math.exp(alt.logprob), 8),
148
+ }
149
+ )
150
+ logprobs_data.append(token_entry)
151
+
152
+ return logprobs_data
153
+
154
+ @staticmethod
155
+ def get_retry_temp(base_temp: float) -> float:
156
+ delta_temp = random.choice([-1, 1]) * random.uniform(0.1, 0.9)
157
+ new_temp = base_temp + delta_temp
158
+
159
+ return max(0.0, min(new_temp, 1.5))
160
+
161
+
162
+ def text_to_chunks(text: str, size: int, overlap: int) -> list[str]:
163
+ separators = ["\n\n", "\n", " ", ""]
164
+ is_separator_regex = False
165
+ keep_separator = True # Equivalent to 'start'
166
+ length_function = len
167
+ strip_whitespace = True
168
+ chunk_size = size
169
+ chunk_overlap = overlap
170
+
171
+ def _split_text_with_regex(
172
+ text: str, separator: str, keep_separator: bool
173
+ ) -> list[str]:
174
+ if not separator:
175
+ return [text]
176
+ if not keep_separator:
177
+ return re.split(separator, text)
178
+ _splits = re.split(f"({separator})", text)
179
+ splits = [_splits[i] + _splits[i + 1] for i in range(1, len(_splits), 2)]
180
+ if len(_splits) % 2 == 0:
181
+ splits += [_splits[-1]]
182
+ return [_splits[0]] + splits if _splits[0] else splits
183
+
184
+ def _join_docs(docs: list[str], separator: str) -> str | None:
185
+ text = separator.join(docs)
186
+ if strip_whitespace:
187
+ text = text.strip()
188
+ return text if text else None
189
+
190
+ def _merge_splits(splits: list[str], separator: str) -> list[str]:
191
+ separator_len = length_function(separator)
192
+ docs = []
193
+ current_doc = []
194
+ total = 0
195
+ for d in splits:
196
+ len_ = length_function(d)
197
+ if total + len_ + (separator_len if current_doc else 0) > chunk_size:
198
+ if total > chunk_size:
199
+ pass
200
+ if current_doc:
201
+ doc = _join_docs(current_doc, separator)
202
+ if doc is not None:
203
+ docs.append(doc)
204
+ while total > chunk_overlap or (
205
+ total + len_ + (separator_len if current_doc else 0)
206
+ > chunk_size
207
+ and total > 0
208
+ ):
209
+ total -= length_function(current_doc[0]) + (
210
+ separator_len if len(current_doc) > 1 else 0
211
+ )
212
+ current_doc = current_doc[1:]
213
+ current_doc.append(d)
214
+ total += len_ + (separator_len if len(current_doc) > 1 else 0)
215
+ doc = _join_docs(current_doc, separator)
216
+ if doc is not None:
217
+ docs.append(doc)
218
+ return docs
219
+
220
+ def _split_text(text: str, separators: list[str]) -> list[str]:
221
+ final_chunks = []
222
+ separator = separators[-1]
223
+ new_separators = []
224
+ for i, _s in enumerate(separators):
225
+ separator_ = _s if is_separator_regex else re.escape(_s)
226
+ if not _s:
227
+ separator = _s
228
+ break
229
+ if re.search(separator_, text):
230
+ separator = _s
231
+ new_separators = separators[i + 1 :]
232
+ break
233
+ separator_ = separator if is_separator_regex else re.escape(separator)
234
+ splits = _split_text_with_regex(text, separator_, keep_separator)
235
+ _separator = "" if keep_separator else separator
236
+ good_splits = []
237
+ for s in splits:
238
+ if length_function(s) < chunk_size:
239
+ good_splits.append(s)
240
+ else:
241
+ if good_splits:
242
+ merged_text = _merge_splits(good_splits, _separator)
243
+ final_chunks.extend(merged_text)
244
+ good_splits = []
245
+ if not new_separators:
246
+ final_chunks.append(s)
247
+ else:
248
+ other_info = _split_text(s, new_separators)
249
+ final_chunks.extend(other_info)
250
+ if good_splits:
251
+ merged_text = _merge_splits(good_splits, _separator)
252
+ final_chunks.extend(merged_text)
253
+ return final_chunks
254
+
255
+ return _split_text(text, separators)
256
+
257
+
258
+ async def run_with_timeout(coro, timeout: float | None):
259
+ if timeout is None:
260
+ return await coro
261
+ try:
262
+ return await asyncio.wait_for(coro, timeout=timeout)
263
+ except asyncio.TimeoutError:
264
+ raise TimeoutError(f"Operation exceeded timeout of {timeout} seconds")
@@ -0,0 +1,58 @@
1
+ from typing import Any, Literal, Type
2
+
3
+ from pydantic import BaseModel, Field, create_model
4
+
5
+
6
+ class OperatorOutput(BaseModel):
7
+ result: Any
8
+ analysis: str | None
9
+ logprobs: list[dict[str, Any]] | None
10
+
11
+
12
+ class Str(BaseModel):
13
+ result: str = Field(..., description="The output string", example="text")
14
+
15
+
16
+ class Bool(BaseModel):
17
+ result: bool = Field(
18
+ ..., description="Boolean indicating the output state", example=True
19
+ )
20
+
21
+
22
+ class ListStr(BaseModel):
23
+ result: list[str] = Field(
24
+ ..., description="The output list of strings", example=["text_1", "text_2"]
25
+ )
26
+
27
+
28
+ class ListDictStrStr(BaseModel):
29
+ result: list[dict[str, str]] = Field(
30
+ ...,
31
+ description="List of dictionaries containing string key-value pairs",
32
+ example=[{"text": "Mohammad", "type": "PER"}, {"text": "Iran", "type": "LOC"}],
33
+ )
34
+
35
+
36
+ class ReasonListStr(BaseModel):
37
+ reason: str = Field(..., description="Thinking process that led to the output")
38
+ result: list[str] = Field(
39
+ ..., description="The output list of strings", example=["text_1", "text_2"]
40
+ )
41
+
42
+
43
+ # This function is needed to create CategorizerOutput with dynamic categories
44
+ def create_dynamic_model(allowed_values: list[str]) -> Type[BaseModel]:
45
+ literal_type = Literal[*allowed_values]
46
+
47
+ CategorizerOutput = create_model(
48
+ "CategorizerOutput",
49
+ reason=(
50
+ str,
51
+ Field(
52
+ ..., description="Explanation of why the input belongs to the category"
53
+ ),
54
+ ),
55
+ result=(literal_type, Field(..., description="Predicted category label")),
56
+ )
57
+
58
+ return CategorizerOutput