hamtaa-texttools 1.1.23__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {hamtaa_texttools-1.1.23/hamtaa_texttools.egg-info → hamtaa_texttools-1.2.0}/PKG-INFO +11 -25
  2. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0/hamtaa_texttools.egg-info}/PKG-INFO +11 -25
  3. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/hamtaa_texttools.egg-info/SOURCES.txt +13 -12
  4. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/hamtaa_texttools.egg-info/requires.txt +1 -1
  5. hamtaa_texttools-1.2.0/pyproject.toml +45 -0
  6. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/__init__.py +4 -4
  7. hamtaa_texttools-1.2.0/texttools/batch/__init__.py +0 -0
  8. hamtaa_texttools-1.1.23/texttools/batch/batch_config.py → hamtaa_texttools-1.2.0/texttools/batch/config.py +2 -2
  9. hamtaa_texttools-1.1.23/texttools/batch/batch_manager.py → hamtaa_texttools-1.2.0/texttools/batch/manager.py +3 -3
  10. hamtaa_texttools-1.1.23/texttools/batch/batch_runner.py → hamtaa_texttools-1.2.0/texttools/batch/runner.py +6 -6
  11. hamtaa_texttools-1.2.0/texttools/core/__init__.py +0 -0
  12. hamtaa_texttools-1.2.0/texttools/core/engine.py +254 -0
  13. hamtaa_texttools-1.2.0/texttools/core/internal_models.py +58 -0
  14. {hamtaa_texttools-1.1.23/texttools/internals → hamtaa_texttools-1.2.0/texttools/core/operators}/async_operator.py +4 -10
  15. {hamtaa_texttools-1.1.23/texttools/internals → hamtaa_texttools-1.2.0/texttools/core/operators}/sync_operator.py +4 -10
  16. hamtaa_texttools-1.2.0/texttools/models.py +88 -0
  17. hamtaa_texttools-1.2.0/texttools/py.typed +0 -0
  18. hamtaa_texttools-1.2.0/texttools/tools/__init__.py +0 -0
  19. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/tools/async_tools.py +97 -94
  20. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/tools/sync_tools.py +97 -94
  21. hamtaa_texttools-1.1.23/MANIFEST.in +0 -2
  22. hamtaa_texttools-1.1.23/pyproject.toml +0 -34
  23. hamtaa_texttools-1.1.23/texttools/internals/models.py +0 -150
  24. hamtaa_texttools-1.1.23/texttools/internals/operator_utils.py +0 -73
  25. hamtaa_texttools-1.1.23/texttools/internals/prompt_loader.py +0 -86
  26. hamtaa_texttools-1.1.23/texttools/internals/text_to_chunks.py +0 -97
  27. hamtaa_texttools-1.1.23/texttools/prompts/README.md +0 -35
  28. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/LICENSE +0 -0
  29. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/README.md +0 -0
  30. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  31. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  32. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/setup.cfg +0 -0
  33. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/tests/test_all_async_tools.py +0 -0
  34. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/tests/test_all_tools.py +0 -0
  35. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/tests/test_output_validation.py +0 -0
  36. {hamtaa_texttools-1.1.23/texttools/internals → hamtaa_texttools-1.2.0/texttools/core}/exceptions.py +0 -0
  37. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/prompts/categorize.yaml +0 -0
  38. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/prompts/check_fact.yaml +0 -0
  39. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/prompts/extract_entities.yaml +0 -0
  40. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/prompts/extract_keywords.yaml +0 -0
  41. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/prompts/is_question.yaml +0 -0
  42. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/prompts/merge_questions.yaml +0 -0
  43. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/prompts/propositionize.yaml +0 -0
  44. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/prompts/rewrite.yaml +0 -0
  45. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/prompts/run_custom.yaml +0 -0
  46. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/prompts/subject_to_question.yaml +0 -0
  47. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/prompts/summarize.yaml +0 -0
  48. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/prompts/text_to_question.yaml +0 -0
  49. {hamtaa_texttools-1.1.23 → hamtaa_texttools-1.2.0}/texttools/prompts/translate.yaml +0 -0
@@ -1,34 +1,20 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.23
3
+ Version: 1.2.0
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
- Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
- License: MIT License
7
-
8
- Copyright (c) 2025 Hamtaa
9
-
10
- Permission is hereby granted, free of charge, to any person obtaining a copy
11
- of this software and associated documentation files (the "Software"), to deal
12
- in the Software without restriction, including without limitation the rights
13
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
- copies of the Software, and to permit persons to whom the Software is
15
- furnished to do so, subject to the following conditions:
16
-
17
- The above copyright notice and this permission notice shall be included in all
18
- copies or substantial portions of the Software.
19
-
20
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
- SOFTWARE.
5
+ Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
+ Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
7
+ License: MIT
27
8
  Keywords: nlp,llm,text-processing,openai
28
- Requires-Python: >=3.8
9
+ Classifier: Development Status :: 5 - Production/Stable
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Classifier: Topic :: Text Processing
13
+ Classifier: Operating System :: OS Independent
14
+ Requires-Python: >=3.9
29
15
  Description-Content-Type: text/markdown
30
16
  License-File: LICENSE
31
- Requires-Dist: openai==1.97.1
17
+ Requires-Dist: openai>=1.97.1
32
18
  Requires-Dist: pydantic>=2.0.0
33
19
  Requires-Dist: pyyaml>=6.0
34
20
  Dynamic: license-file
@@ -1,34 +1,20 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.23
3
+ Version: 1.2.0
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
- Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
- License: MIT License
7
-
8
- Copyright (c) 2025 Hamtaa
9
-
10
- Permission is hereby granted, free of charge, to any person obtaining a copy
11
- of this software and associated documentation files (the "Software"), to deal
12
- in the Software without restriction, including without limitation the rights
13
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
- copies of the Software, and to permit persons to whom the Software is
15
- furnished to do so, subject to the following conditions:
16
-
17
- The above copyright notice and this permission notice shall be included in all
18
- copies or substantial portions of the Software.
19
-
20
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
- SOFTWARE.
5
+ Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
+ Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
7
+ License: MIT
27
8
  Keywords: nlp,llm,text-processing,openai
28
- Requires-Python: >=3.8
9
+ Classifier: Development Status :: 5 - Production/Stable
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Classifier: Topic :: Text Processing
13
+ Classifier: Operating System :: OS Independent
14
+ Requires-Python: >=3.9
29
15
  Description-Content-Type: text/markdown
30
16
  License-File: LICENSE
31
- Requires-Dist: openai==1.97.1
17
+ Requires-Dist: openai>=1.97.1
32
18
  Requires-Dist: pydantic>=2.0.0
33
19
  Requires-Dist: pyyaml>=6.0
34
20
  Dynamic: license-file
@@ -1,5 +1,4 @@
1
1
  LICENSE
2
- MANIFEST.in
3
2
  README.md
4
3
  pyproject.toml
5
4
  hamtaa_texttools.egg-info/PKG-INFO
@@ -11,17 +10,18 @@ tests/test_all_async_tools.py
11
10
  tests/test_all_tools.py
12
11
  tests/test_output_validation.py
13
12
  texttools/__init__.py
14
- texttools/batch/batch_config.py
15
- texttools/batch/batch_manager.py
16
- texttools/batch/batch_runner.py
17
- texttools/internals/async_operator.py
18
- texttools/internals/exceptions.py
19
- texttools/internals/models.py
20
- texttools/internals/operator_utils.py
21
- texttools/internals/prompt_loader.py
22
- texttools/internals/sync_operator.py
23
- texttools/internals/text_to_chunks.py
24
- texttools/prompts/README.md
13
+ texttools/models.py
14
+ texttools/py.typed
15
+ texttools/batch/__init__.py
16
+ texttools/batch/config.py
17
+ texttools/batch/manager.py
18
+ texttools/batch/runner.py
19
+ texttools/core/__init__.py
20
+ texttools/core/engine.py
21
+ texttools/core/exceptions.py
22
+ texttools/core/internal_models.py
23
+ texttools/core/operators/async_operator.py
24
+ texttools/core/operators/sync_operator.py
25
25
  texttools/prompts/categorize.yaml
26
26
  texttools/prompts/check_fact.yaml
27
27
  texttools/prompts/extract_entities.yaml
@@ -35,5 +35,6 @@ texttools/prompts/subject_to_question.yaml
35
35
  texttools/prompts/summarize.yaml
36
36
  texttools/prompts/text_to_question.yaml
37
37
  texttools/prompts/translate.yaml
38
+ texttools/tools/__init__.py
38
39
  texttools/tools/async_tools.py
39
40
  texttools/tools/sync_tools.py
@@ -1,3 +1,3 @@
1
- openai==1.97.1
1
+ openai>=1.97.1
2
2
  pydantic>=2.0.0
3
3
  pyyaml>=6.0
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hamtaa-texttools"
7
+ version = "1.2.0"
8
+ authors = [
9
+ {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
10
+ {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
11
+ {name = "Montazer", email = "montazerh82@gmail.com"},
12
+ {name = "Givechi", email = "mohamad.m.givechi@gmail.com"},
13
+ {name = "Zareshahi", email = "a.zareshahi1377@gmail.com"},
14
+ ]
15
+ maintainers = [
16
+ {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
17
+ {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
18
+ ]
19
+ description = "A high-level NLP toolkit built on top of modern LLMs."
20
+ readme = "README.md"
21
+ license = {text = "MIT"}
22
+ requires-python = ">=3.9"
23
+ dependencies = [
24
+ "openai>=1.97.1",
25
+ "pydantic>=2.0.0",
26
+ "pyyaml>=6.0",
27
+ ]
28
+ keywords = ["nlp", "llm", "text-processing", "openai"]
29
+ classifiers = [
30
+ "Development Status :: 5 - Production/Stable",
31
+ "License :: OSI Approved :: MIT License",
32
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
33
+ "Topic :: Text Processing",
34
+ "Operating System :: OS Independent",
35
+ ]
36
+
37
+ [tool.setuptools.packages.find]
38
+ where = ["."]
39
+ include = ["texttools*"]
40
+
41
+ [tool.setuptools]
42
+ include-package-data = true
43
+
44
+ [tool.setuptools.package-data]
45
+ "texttools" = ["prompts/*.yaml", "py.typed"]
@@ -1,7 +1,7 @@
1
- from .tools.sync_tools import TheTool
1
+ from .batch.config import BatchConfig
2
+ from .batch.runner import BatchRunner
3
+ from .models import CategoryTree
2
4
  from .tools.async_tools import AsyncTheTool
3
- from .internals.models import CategoryTree
4
- from .batch.batch_runner import BatchRunner
5
- from .batch.batch_config import BatchConfig
5
+ from .tools.sync_tools import TheTool
6
6
 
7
7
  __all__ = ["TheTool", "AsyncTheTool", "CategoryTree", "BatchRunner", "BatchConfig"]
File without changes
@@ -1,6 +1,6 @@
1
- from typing import Any
2
- from dataclasses import dataclass
3
1
  from collections.abc import Callable
2
+ from dataclasses import dataclass
3
+ from typing import Any
4
4
 
5
5
 
6
6
  def export_data(data) -> list[dict[str, str]]:
@@ -1,12 +1,12 @@
1
1
  import json
2
+ import logging
2
3
  import uuid
3
4
  from pathlib import Path
4
- from typing import Type, TypeVar, Any
5
- import logging
5
+ from typing import Any, Type, TypeVar
6
6
 
7
- from pydantic import BaseModel
8
7
  from openai import OpenAI
9
8
  from openai.lib._pydantic import to_strict_json_schema
9
+ from pydantic import BaseModel
10
10
 
11
11
  # Base Model type for output models
12
12
  T = TypeVar("T", bound=BaseModel)
@@ -1,18 +1,18 @@
1
1
  import json
2
+ import logging
2
3
  import os
3
4
  import time
4
5
  from pathlib import Path
5
- from typing import Type, TypeVar, Any
6
- import logging
6
+ from typing import Any, Type, TypeVar
7
7
 
8
8
  from dotenv import load_dotenv
9
9
  from openai import OpenAI
10
10
  from pydantic import BaseModel
11
11
 
12
- from texttools.batch.batch_manager import BatchManager
13
- from texttools.batch.batch_config import BatchConfig
14
- from texttools.internals.models import Str
15
- from texttools.internals.exceptions import TextToolsError
12
+ from ..core.exceptions import TextToolsError
13
+ from ..core.internal_models import Str
14
+ from .config import BatchConfig
15
+ from .manager import BatchManager
16
16
 
17
17
  # Base Model type for output models
18
18
  T = TypeVar("T", bound=BaseModel)
File without changes
@@ -0,0 +1,254 @@
1
+ import math
2
+ import random
3
+ import re
4
+ from functools import lru_cache
5
+ from pathlib import Path
6
+
7
+ import yaml
8
+
9
+ from .exceptions import PromptError
10
+
11
+
12
+ class PromptLoader:
13
+ """
14
+ Utility for loading and formatting YAML prompt templates.
15
+ """
16
+
17
+ MAIN_TEMPLATE = "main_template"
18
+ ANALYZE_TEMPLATE = "analyze_template"
19
+
20
+ @lru_cache(maxsize=32)
21
+ def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
22
+ """
23
+ Loads prompt templates from YAML file with optional mode selection.
24
+ """
25
+ try:
26
+ base_dir = Path(__file__).parent.parent / Path("prompts")
27
+ prompt_path = base_dir / prompt_file
28
+
29
+ if not prompt_path.exists():
30
+ raise PromptError(f"Prompt file not found: {prompt_file}")
31
+
32
+ data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
33
+
34
+ if self.MAIN_TEMPLATE not in data:
35
+ raise PromptError(f"Missing 'main_template' in {prompt_file}")
36
+
37
+ if self.ANALYZE_TEMPLATE not in data:
38
+ raise PromptError(f"Missing 'analyze_template' in {prompt_file}")
39
+
40
+ if mode and mode not in data.get(self.MAIN_TEMPLATE, {}):
41
+ raise PromptError(f"Mode '{mode}' not found in {prompt_file}")
42
+
43
+ main_template = (
44
+ data[self.MAIN_TEMPLATE][mode]
45
+ if mode and isinstance(data[self.MAIN_TEMPLATE], dict)
46
+ else data[self.MAIN_TEMPLATE]
47
+ )
48
+
49
+ analyze_template = (
50
+ data[self.ANALYZE_TEMPLATE][mode]
51
+ if mode and isinstance(data[self.ANALYZE_TEMPLATE], dict)
52
+ else data[self.ANALYZE_TEMPLATE]
53
+ )
54
+
55
+ if not main_template or not main_template.strip():
56
+ raise PromptError(
57
+ f"Empty main_template in {prompt_file}"
58
+ + (f" for mode '{mode}'" if mode else "")
59
+ )
60
+
61
+ return {
62
+ self.MAIN_TEMPLATE: main_template,
63
+ self.ANALYZE_TEMPLATE: analyze_template,
64
+ }
65
+
66
+ except yaml.YAMLError as e:
67
+ raise PromptError(f"Invalid YAML in {prompt_file}: {e}")
68
+ except Exception as e:
69
+ raise PromptError(f"Failed to load prompt {prompt_file}: {e}")
70
+
71
+ def load(
72
+ self, prompt_file: str, text: str, mode: str, **extra_kwargs
73
+ ) -> dict[str, str]:
74
+ try:
75
+ template_configs = self._load_templates(prompt_file, mode)
76
+ format_args = {"text": text}
77
+ format_args.update(extra_kwargs)
78
+
79
+ # Inject variables inside each template
80
+ for key in template_configs.keys():
81
+ template_configs[key] = template_configs[key].format(**format_args)
82
+
83
+ return template_configs
84
+
85
+ except KeyError as e:
86
+ raise PromptError(f"Missing template variable: {e}")
87
+ except Exception as e:
88
+ raise PromptError(f"Failed to format prompt: {e}")
89
+
90
+
91
+ class OperatorUtils:
92
+ @staticmethod
93
+ def build_main_prompt(
94
+ main_template: str,
95
+ analysis: str | None,
96
+ output_lang: str | None,
97
+ user_prompt: str | None,
98
+ ) -> str:
99
+ main_prompt = ""
100
+
101
+ if analysis:
102
+ main_prompt += f"Based on this analysis:\n{analysis}\n"
103
+
104
+ if output_lang:
105
+ main_prompt += f"Respond only in the {output_lang} language.\n"
106
+
107
+ if user_prompt:
108
+ main_prompt += f"Consider this instruction {user_prompt}\n"
109
+
110
+ main_prompt += main_template
111
+
112
+ return main_prompt
113
+
114
+ @staticmethod
115
+ def build_message(prompt: str) -> list[dict[str, str]]:
116
+ return [{"role": "user", "content": prompt}]
117
+
118
+ @staticmethod
119
+ def extract_logprobs(completion: dict) -> list[dict]:
120
+ """
121
+ Extracts and filters token probabilities from completion logprobs.
122
+ Skips punctuation and structural tokens, returns cleaned probability data.
123
+ """
124
+ logprobs_data = []
125
+
126
+ ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
127
+
128
+ for choice in completion.choices:
129
+ if not getattr(choice, "logprobs", None):
130
+ raise ValueError("Your model does not support logprobs")
131
+
132
+ for logprob_item in choice.logprobs.content:
133
+ if ignore_pattern.match(logprob_item.token):
134
+ continue
135
+ token_entry = {
136
+ "token": logprob_item.token,
137
+ "prob": round(math.exp(logprob_item.logprob), 8),
138
+ "top_alternatives": [],
139
+ }
140
+ for alt in logprob_item.top_logprobs:
141
+ if ignore_pattern.match(alt.token):
142
+ continue
143
+ token_entry["top_alternatives"].append(
144
+ {
145
+ "token": alt.token,
146
+ "prob": round(math.exp(alt.logprob), 8),
147
+ }
148
+ )
149
+ logprobs_data.append(token_entry)
150
+
151
+ return logprobs_data
152
+
153
+ @staticmethod
154
+ def get_retry_temp(base_temp: float) -> float:
155
+ delta_temp = random.choice([-1, 1]) * random.uniform(0.1, 0.9)
156
+ new_temp = base_temp + delta_temp
157
+
158
+ return max(0.0, min(new_temp, 1.5))
159
+
160
+
161
+ def text_to_chunks(text: str, size: int, overlap: int) -> list[str]:
162
+ separators = ["\n\n", "\n", " ", ""]
163
+ is_separator_regex = False
164
+ keep_separator = True # Equivalent to 'start'
165
+ length_function = len
166
+ strip_whitespace = True
167
+ chunk_size = size
168
+ chunk_overlap = overlap
169
+
170
+ def _split_text_with_regex(
171
+ text: str, separator: str, keep_separator: bool
172
+ ) -> list[str]:
173
+ if not separator:
174
+ return [text]
175
+ if not keep_separator:
176
+ return re.split(separator, text)
177
+ _splits = re.split(f"({separator})", text)
178
+ splits = [_splits[i] + _splits[i + 1] for i in range(1, len(_splits), 2)]
179
+ if len(_splits) % 2 == 0:
180
+ splits += [_splits[-1]]
181
+ return [_splits[0]] + splits if _splits[0] else splits
182
+
183
+ def _join_docs(docs: list[str], separator: str) -> str | None:
184
+ text = separator.join(docs)
185
+ if strip_whitespace:
186
+ text = text.strip()
187
+ return text if text else None
188
+
189
+ def _merge_splits(splits: list[str], separator: str) -> list[str]:
190
+ separator_len = length_function(separator)
191
+ docs = []
192
+ current_doc = []
193
+ total = 0
194
+ for d in splits:
195
+ len_ = length_function(d)
196
+ if total + len_ + (separator_len if current_doc else 0) > chunk_size:
197
+ if total > chunk_size:
198
+ pass
199
+ if current_doc:
200
+ doc = _join_docs(current_doc, separator)
201
+ if doc is not None:
202
+ docs.append(doc)
203
+ while total > chunk_overlap or (
204
+ total + len_ + (separator_len if current_doc else 0)
205
+ > chunk_size
206
+ and total > 0
207
+ ):
208
+ total -= length_function(current_doc[0]) + (
209
+ separator_len if len(current_doc) > 1 else 0
210
+ )
211
+ current_doc = current_doc[1:]
212
+ current_doc.append(d)
213
+ total += len_ + (separator_len if len(current_doc) > 1 else 0)
214
+ doc = _join_docs(current_doc, separator)
215
+ if doc is not None:
216
+ docs.append(doc)
217
+ return docs
218
+
219
+ def _split_text(text: str, separators: list[str]) -> list[str]:
220
+ final_chunks = []
221
+ separator = separators[-1]
222
+ new_separators = []
223
+ for i, _s in enumerate(separators):
224
+ separator_ = _s if is_separator_regex else re.escape(_s)
225
+ if not _s:
226
+ separator = _s
227
+ break
228
+ if re.search(separator_, text):
229
+ separator = _s
230
+ new_separators = separators[i + 1 :]
231
+ break
232
+ separator_ = separator if is_separator_regex else re.escape(separator)
233
+ splits = _split_text_with_regex(text, separator_, keep_separator)
234
+ _separator = "" if keep_separator else separator
235
+ good_splits = []
236
+ for s in splits:
237
+ if length_function(s) < chunk_size:
238
+ good_splits.append(s)
239
+ else:
240
+ if good_splits:
241
+ merged_text = _merge_splits(good_splits, _separator)
242
+ final_chunks.extend(merged_text)
243
+ good_splits = []
244
+ if not new_separators:
245
+ final_chunks.append(s)
246
+ else:
247
+ other_info = _split_text(s, new_separators)
248
+ final_chunks.extend(other_info)
249
+ if good_splits:
250
+ merged_text = _merge_splits(good_splits, _separator)
251
+ final_chunks.extend(merged_text)
252
+ return final_chunks
253
+
254
+ return _split_text(text, separators)
@@ -0,0 +1,58 @@
1
+ from typing import Any, Literal, Type
2
+
3
+ from pydantic import BaseModel, Field, create_model
4
+
5
+
6
+ class OperatorOutput(BaseModel):
7
+ result: Any
8
+ analysis: str | None
9
+ logprobs: list[dict[str, Any]] | None
10
+
11
+
12
+ class Str(BaseModel):
13
+ result: str = Field(..., description="The output string", example="text")
14
+
15
+
16
+ class Bool(BaseModel):
17
+ result: bool = Field(
18
+ ..., description="Boolean indicating the output state", example=True
19
+ )
20
+
21
+
22
+ class ListStr(BaseModel):
23
+ result: list[str] = Field(
24
+ ..., description="The output list of strings", example=["text_1", "text_2"]
25
+ )
26
+
27
+
28
+ class ListDictStrStr(BaseModel):
29
+ result: list[dict[str, str]] = Field(
30
+ ...,
31
+ description="List of dictionaries containing string key-value pairs",
32
+ example=[{"text": "Mohammad", "type": "PER"}, {"text": "Iran", "type": "LOC"}],
33
+ )
34
+
35
+
36
+ class ReasonListStr(BaseModel):
37
+ reason: str = Field(..., description="Thinking process that led to the output")
38
+ result: list[str] = Field(
39
+ ..., description="The output list of strings", example=["text_1", "text_2"]
40
+ )
41
+
42
+
43
+ # This function is needed to create CategorizerOutput with dynamic categories
44
+ def create_dynamic_model(allowed_values: list[str]) -> Type[BaseModel]:
45
+ literal_type = Literal[*allowed_values]
46
+
47
+ CategorizerOutput = create_model(
48
+ "CategorizerOutput",
49
+ reason=(
50
+ str,
51
+ Field(
52
+ ..., description="Explanation of why the input belongs to the category"
53
+ ),
54
+ ),
55
+ result=(literal_type, Field(..., description="Predicted category label")),
56
+ )
57
+
58
+ return CategorizerOutput
@@ -1,18 +1,12 @@
1
- from typing import TypeVar, Type, Any
2
1
  from collections.abc import Callable
2
+ from typing import Any, Type, TypeVar
3
3
 
4
4
  from openai import AsyncOpenAI
5
5
  from pydantic import BaseModel
6
6
 
7
- from texttools.internals.models import OperatorOutput
8
- from texttools.internals.operator_utils import OperatorUtils
9
- from texttools.internals.prompt_loader import PromptLoader
10
- from texttools.internals.exceptions import (
11
- TextToolsError,
12
- LLMError,
13
- ValidationError,
14
- PromptError,
15
- )
7
+ from ..engine import OperatorUtils, PromptLoader
8
+ from ..exceptions import LLMError, PromptError, TextToolsError, ValidationError
9
+ from ..internal_models import OperatorOutput
16
10
 
17
11
  # Base Model type for output models
18
12
  T = TypeVar("T", bound=BaseModel)
@@ -1,18 +1,12 @@
1
- from typing import TypeVar, Type, Any
2
1
  from collections.abc import Callable
2
+ from typing import Any, Type, TypeVar
3
3
 
4
4
  from openai import OpenAI
5
5
  from pydantic import BaseModel
6
6
 
7
- from texttools.internals.models import OperatorOutput
8
- from texttools.internals.operator_utils import OperatorUtils
9
- from texttools.internals.prompt_loader import PromptLoader
10
- from texttools.internals.exceptions import (
11
- TextToolsError,
12
- LLMError,
13
- ValidationError,
14
- PromptError,
15
- )
7
+ from ..engine import OperatorUtils, PromptLoader
8
+ from ..exceptions import LLMError, PromptError, TextToolsError, ValidationError
9
+ from ..internal_models import OperatorOutput
16
10
 
17
11
  # Base Model type for output models
18
12
  T = TypeVar("T", bound=BaseModel)