hamtaa-texttools 1.3.1__tar.gz → 1.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/PKG-INFO +8 -11
  2. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/README.md +7 -10
  3. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/hamtaa_texttools.egg-info/PKG-INFO +8 -11
  4. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/hamtaa_texttools.egg-info/SOURCES.txt +1 -0
  5. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/pyproject.toml +1 -1
  6. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/core/engine.py +21 -23
  7. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/core/internal_models.py +7 -3
  8. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/core/operators/async_operator.py +1 -3
  9. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/core/operators/sync_operator.py +1 -3
  10. hamtaa_texttools-1.3.2/texttools/tools/__init__.py +0 -0
  11. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/LICENSE +0 -0
  12. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  13. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/hamtaa_texttools.egg-info/requires.txt +0 -0
  14. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  15. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/setup.cfg +0 -0
  16. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/tests/test_all_async_tools.py +0 -0
  17. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/tests/test_all_tools.py +0 -0
  18. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/tests/test_output_validation.py +0 -0
  19. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/__init__.py +0 -0
  20. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/core/__init__.py +0 -0
  21. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/core/exceptions.py +0 -0
  22. {hamtaa_texttools-1.3.1/texttools/tools → hamtaa_texttools-1.3.2/texttools/core/operators}/__init__.py +0 -0
  23. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/models.py +0 -0
  24. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/prompts/categorize.yaml +0 -0
  25. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/prompts/check_fact.yaml +0 -0
  26. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/prompts/extract_entities.yaml +0 -0
  27. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/prompts/extract_keywords.yaml +0 -0
  28. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/prompts/is_question.yaml +0 -0
  29. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/prompts/merge_questions.yaml +0 -0
  30. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/prompts/propositionize.yaml +0 -0
  31. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/prompts/rewrite.yaml +0 -0
  32. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/prompts/run_custom.yaml +0 -0
  33. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/prompts/subject_to_question.yaml +0 -0
  34. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/prompts/summarize.yaml +0 -0
  35. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/prompts/text_to_question.yaml +0 -0
  36. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/prompts/translate.yaml +0 -0
  37. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/py.typed +0 -0
  38. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/tools/async_tools.py +0 -0
  39. {hamtaa_texttools-1.3.1 → hamtaa_texttools-1.3.2}/texttools/tools/sync_tools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.3.1
3
+ Version: 1.3.2
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -21,6 +21,9 @@ Dynamic: license-file
21
21
 
22
22
  # TextTools
23
23
 
24
+ ![PyPI](https://img.shields.io/pypi/v/hamtaa-texttools)
25
+ ![License](https://img.shields.io/pypi/l/hamtaa-texttools)
26
+
24
27
  ## 📌 Overview
25
28
 
26
29
  **TextTools** is a high-level **NLP toolkit** built on top of **LLMs**.
@@ -44,11 +47,11 @@ Each tool is designed to work with structured outputs.
44
47
  - **`is_question()`** - Binary question detection
45
48
  - **`text_to_question()`** - Generates questions from text
46
49
  - **`merge_questions()`** - Merges multiple questions into one
47
- - **`rewrite()`** - Rewrites text in a diffrent way
48
- - **`subject_to_question()`** - Generates questions about a specific subject
50
+ - **`rewrite()`** - Rewrites text in a different way
51
+ - **`subject_to_question()`** - Generates questions about a given subject
49
52
  - **`summarize()`** - Text summarization
50
53
  - **`translate()`** - Text translation
51
- - **`propositionize()`** - Convert text to atomic independence meaningful sentences
54
+ - **`propositionize()`** - Convert text to atomic independent meaningful sentences
52
55
  - **`check_fact()`** - Check whether a statement is relevant to the source text
53
56
  - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
54
57
 
@@ -66,7 +69,7 @@ pip install -U hamtaa-texttools
66
69
 
67
70
  ## 📊 Tool Quality Tiers
68
71
 
69
- | Status | Meaning | Tools | Use in Production? |
72
+ | Status | Meaning | Tools | Safe for Production? |
70
73
  |--------|---------|----------|-------------------|
71
74
  | **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
72
75
  | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
@@ -181,9 +184,3 @@ Use **TextTools** when you need to:
181
184
 
182
185
  Contributions are welcome!
183
186
  Feel free to **open issues, suggest new features, or submit pull requests**.
184
-
185
- ---
186
-
187
- ## 🌿 License
188
-
189
- This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -1,5 +1,8 @@
1
1
  # TextTools
2
2
 
3
+ ![PyPI](https://img.shields.io/pypi/v/hamtaa-texttools)
4
+ ![License](https://img.shields.io/pypi/l/hamtaa-texttools)
5
+
3
6
  ## 📌 Overview
4
7
 
5
8
  **TextTools** is a high-level **NLP toolkit** built on top of **LLMs**.
@@ -23,11 +26,11 @@ Each tool is designed to work with structured outputs.
23
26
  - **`is_question()`** - Binary question detection
24
27
  - **`text_to_question()`** - Generates questions from text
25
28
  - **`merge_questions()`** - Merges multiple questions into one
26
- - **`rewrite()`** - Rewrites text in a diffrent way
27
- - **`subject_to_question()`** - Generates questions about a specific subject
29
+ - **`rewrite()`** - Rewrites text in a different way
30
+ - **`subject_to_question()`** - Generates questions about a given subject
28
31
  - **`summarize()`** - Text summarization
29
32
  - **`translate()`** - Text translation
30
- - **`propositionize()`** - Convert text to atomic independence meaningful sentences
33
+ - **`propositionize()`** - Convert text to atomic independent meaningful sentences
31
34
  - **`check_fact()`** - Check whether a statement is relevant to the source text
32
35
  - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
33
36
 
@@ -45,7 +48,7 @@ pip install -U hamtaa-texttools
45
48
 
46
49
  ## 📊 Tool Quality Tiers
47
50
 
48
- | Status | Meaning | Tools | Use in Production? |
51
+ | Status | Meaning | Tools | Safe for Production? |
49
52
  |--------|---------|----------|-------------------|
50
53
  | **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
51
54
  | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
@@ -160,9 +163,3 @@ Use **TextTools** when you need to:
160
163
 
161
164
  Contributions are welcome!
162
165
  Feel free to **open issues, suggest new features, or submit pull requests**.
163
-
164
- ---
165
-
166
- ## 🌿 License
167
-
168
- This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.3.1
3
+ Version: 1.3.2
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -21,6 +21,9 @@ Dynamic: license-file
21
21
 
22
22
  # TextTools
23
23
 
24
+ ![PyPI](https://img.shields.io/pypi/v/hamtaa-texttools)
25
+ ![License](https://img.shields.io/pypi/l/hamtaa-texttools)
26
+
24
27
  ## 📌 Overview
25
28
 
26
29
  **TextTools** is a high-level **NLP toolkit** built on top of **LLMs**.
@@ -44,11 +47,11 @@ Each tool is designed to work with structured outputs.
44
47
  - **`is_question()`** - Binary question detection
45
48
  - **`text_to_question()`** - Generates questions from text
46
49
  - **`merge_questions()`** - Merges multiple questions into one
47
- - **`rewrite()`** - Rewrites text in a diffrent way
48
- - **`subject_to_question()`** - Generates questions about a specific subject
50
+ - **`rewrite()`** - Rewrites text in a different way
51
+ - **`subject_to_question()`** - Generates questions about a given subject
49
52
  - **`summarize()`** - Text summarization
50
53
  - **`translate()`** - Text translation
51
- - **`propositionize()`** - Convert text to atomic independence meaningful sentences
54
+ - **`propositionize()`** - Convert text to atomic independent meaningful sentences
52
55
  - **`check_fact()`** - Check whether a statement is relevant to the source text
53
56
  - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
54
57
 
@@ -66,7 +69,7 @@ pip install -U hamtaa-texttools
66
69
 
67
70
  ## 📊 Tool Quality Tiers
68
71
 
69
- | Status | Meaning | Tools | Use in Production? |
72
+ | Status | Meaning | Tools | Safe for Production? |
70
73
  |--------|---------|----------|-------------------|
71
74
  | **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
72
75
  | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
@@ -181,9 +184,3 @@ Use **TextTools** when you need to:
181
184
 
182
185
  Contributions are welcome!
183
186
  Feel free to **open issues, suggest new features, or submit pull requests**.
184
-
185
- ---
186
-
187
- ## 🌿 License
188
-
189
- This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -16,6 +16,7 @@ texttools/core/__init__.py
16
16
  texttools/core/engine.py
17
17
  texttools/core/exceptions.py
18
18
  texttools/core/internal_models.py
19
+ texttools/core/operators/__init__.py
19
20
  texttools/core/operators/async_operator.py
20
21
  texttools/core/operators/sync_operator.py
21
22
  texttools/prompts/categorize.yaml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "1.3.1"
7
+ version = "1.3.2"
8
8
  authors = [
9
9
  {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
10
10
  {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
@@ -4,6 +4,7 @@ import random
4
4
  import re
5
5
  from functools import lru_cache
6
6
  from pathlib import Path
7
+ from typing import Any
7
8
 
8
9
  import yaml
9
10
 
@@ -20,9 +21,6 @@ class PromptLoader:
20
21
 
21
22
  @lru_cache(maxsize=32)
22
23
  def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
23
- """
24
- Loads prompt templates from YAML file with optional mode selection.
25
- """
26
24
  try:
27
25
  base_dir = Path(__file__).parent.parent / Path("prompts")
28
26
  prompt_path = base_dir / prompt_file
@@ -73,13 +71,12 @@ class PromptLoader:
73
71
  self, prompt_file: str, text: str, mode: str, **extra_kwargs
74
72
  ) -> dict[str, str]:
75
73
  try:
76
- template_configs = self._load_templates(prompt_file, mode)
77
74
  format_args = {"text": text}
78
75
  format_args.update(extra_kwargs)
79
76
 
80
- # Inject variables inside each template
81
- for key in template_configs.keys():
82
- template_configs[key] = template_configs[key].format(**format_args)
77
+ template_configs = self._load_templates(prompt_file, mode)
78
+ for key, value in template_configs.items():
79
+ template_configs[key] = value.format(**format_args)
83
80
 
84
81
  return template_configs
85
82
 
@@ -97,30 +94,27 @@ class OperatorUtils:
97
94
  output_lang: str | None,
98
95
  user_prompt: str | None,
99
96
  ) -> str:
100
- main_prompt = ""
97
+ parts = []
101
98
 
102
99
  if analysis:
103
- main_prompt += f"Based on this analysis:\n{analysis}\n"
104
-
100
+ parts.append(f"Based on this analysis: {analysis}")
105
101
  if output_lang:
106
- main_prompt += f"Respond only in the {output_lang} language.\n"
107
-
102
+ parts.append(f"Respond only in the {output_lang} language.")
108
103
  if user_prompt:
109
- main_prompt += f"Consider this instruction {user_prompt}\n"
104
+ parts.append(f"Consider this instruction: {user_prompt}")
110
105
 
111
- main_prompt += main_template
112
-
113
- return main_prompt
106
+ parts.append(main_template)
107
+ return "\n".join(parts)
114
108
 
115
109
  @staticmethod
116
110
  def build_message(prompt: str) -> list[dict[str, str]]:
117
111
  return [{"role": "user", "content": prompt}]
118
112
 
119
113
  @staticmethod
120
- def extract_logprobs(completion: dict) -> list[dict]:
114
+ def extract_logprobs(completion: Any) -> list[dict]:
121
115
  """
122
- Extracts and filters token probabilities from completion logprobs.
123
- Skips punctuation and structural tokens, returns cleaned probability data.
116
+ Extracts and filters logprobs from completion.
117
+ Skips punctuation and structural tokens.
124
118
  """
125
119
  logprobs_data = []
126
120
 
@@ -153,16 +147,17 @@ class OperatorUtils:
153
147
 
154
148
  @staticmethod
155
149
  def get_retry_temp(base_temp: float) -> float:
156
- delta_temp = random.choice([-1, 1]) * random.uniform(0.1, 0.9)
157
- new_temp = base_temp + delta_temp
158
-
150
+ new_temp = base_temp + random.choice([-1, 1]) * random.uniform(0.1, 0.9)
159
151
  return max(0.0, min(new_temp, 1.5))
160
152
 
161
153
 
162
154
  def text_to_chunks(text: str, size: int, overlap: int) -> list[str]:
155
+ """
156
+ Utility for chunking large texts. Used for translation tool
157
+ """
163
158
  separators = ["\n\n", "\n", " ", ""]
164
159
  is_separator_regex = False
165
- keep_separator = True # Equivalent to 'start'
160
+ keep_separator = True
166
161
  length_function = len
167
162
  strip_whitespace = True
168
163
  chunk_size = size
@@ -256,6 +251,9 @@ def text_to_chunks(text: str, size: int, overlap: int) -> list[str]:
256
251
 
257
252
 
258
253
  async def run_with_timeout(coro, timeout: float | None):
254
+ """
255
+ Utility for timeout logic defined in AsyncTheTool
256
+ """
259
257
  if timeout is None:
260
258
  return await coro
261
259
  try:
@@ -21,7 +21,9 @@ class Bool(BaseModel):
21
21
 
22
22
  class ListStr(BaseModel):
23
23
  result: list[str] = Field(
24
- ..., description="The output list of strings", example=["text_1", "text_2"]
24
+ ...,
25
+ description="The output list of strings",
26
+ example=["text_1", "text_2", "text_3"],
25
27
  )
26
28
 
27
29
 
@@ -36,11 +38,13 @@ class ListDictStrStr(BaseModel):
36
38
  class ReasonListStr(BaseModel):
37
39
  reason: str = Field(..., description="Thinking process that led to the output")
38
40
  result: list[str] = Field(
39
- ..., description="The output list of strings", example=["text_1", "text_2"]
41
+ ...,
42
+ description="The output list of strings",
43
+ example=["text_1", "text_2", "text_3"],
40
44
  )
41
45
 
42
46
 
43
- # This function is needed to create CategorizerOutput with dynamic categories
47
+ # Create CategorizerOutput with dynamic categories
44
48
  def create_dynamic_model(allowed_values: list[str]) -> Type[BaseModel]:
45
49
  literal_type = Literal[*allowed_values]
46
50
 
@@ -54,7 +54,7 @@ class AsyncOperator:
54
54
  ) -> tuple[T, Any]:
55
55
  """
56
56
  Parses a chat completion using OpenAI's structured output format.
57
- Returns both the parsed Any and the raw completion for logprobs.
57
+ Returns both the parsed and the completion for logprobs.
58
58
  """
59
59
  try:
60
60
  request_kwargs = {
@@ -92,7 +92,6 @@ class AsyncOperator:
92
92
 
93
93
  async def run(
94
94
  self,
95
- # User parameters
96
95
  text: str,
97
96
  with_analysis: bool,
98
97
  output_lang: str | None,
@@ -103,7 +102,6 @@ class AsyncOperator:
103
102
  validator: Callable[[Any], bool] | None,
104
103
  max_validation_retries: int | None,
105
104
  priority: int | None,
106
- # Internal parameters
107
105
  tool_name: str,
108
106
  output_model: Type[T],
109
107
  mode: str | None,
@@ -54,7 +54,7 @@ class Operator:
54
54
  ) -> tuple[T, Any]:
55
55
  """
56
56
  Parses a chat completion using OpenAI's structured output format.
57
- Returns both the parsed Any and the raw completion for logprobs.
57
+ Returns both the parsed and the completion for logprobs.
58
58
  """
59
59
  try:
60
60
  request_kwargs = {
@@ -90,7 +90,6 @@ class Operator:
90
90
 
91
91
  def run(
92
92
  self,
93
- # User parameters
94
93
  text: str,
95
94
  with_analysis: bool,
96
95
  output_lang: str | None,
@@ -101,7 +100,6 @@ class Operator:
101
100
  validator: Callable[[Any], bool] | None,
102
101
  max_validation_retries: int | None,
103
102
  priority: int | None,
104
- # Internal parameters
105
103
  tool_name: str,
106
104
  output_model: Type[T],
107
105
  mode: str | None,
File without changes