hamtaa-texttools 1.3.2__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,31 +0,0 @@
1
- hamtaa_texttools-1.3.2.dist-info/licenses/LICENSE,sha256=Hb2YOBKy2MJQLnyLrX37B4ZVuac8eaIcE71SvVIMOLg,1082
2
- texttools/__init__.py,sha256=RK1GAU6pq2lGwFtHdrCX5JkPRHmOLGcmGH67hd_7VAQ,175
3
- texttools/models.py,sha256=5eT2cSrFq8Xa38kANznV7gbi7lwB2PoDxciLKTpsd6c,2516
4
- texttools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- texttools/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- texttools/core/engine.py,sha256=AjifrcJl6PeRu1W6nu9zcxySn-1439Ef2La4d7GpNKY,9481
7
- texttools/core/exceptions.py,sha256=6SDjUL1rmd3ngzD3ytF4LyTRj3bQMSFR9ECrLoqXXHw,395
8
- texttools/core/internal_models.py,sha256=J1qGEO8V0OoX6_-1yxbSmZSR79tJF0ExAIG1QuvH0L0,1734
9
- texttools/core/operators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- texttools/core/operators/async_operator.py,sha256=-72YQEGFkbk2uYW6PHkLT4wGxhj2p6Uqy3sJtVa9-rk,6386
11
- texttools/core/operators/sync_operator.py,sha256=mfXtEOlIAhHo4SHaHRKjGb0Z1T894clv-toUzUcbfpo,6291
12
- texttools/prompts/categorize.yaml,sha256=42Rp3SgVHaDLKrJ27_uK788LiQud0pOXJthz4r0a40Y,1214
13
- texttools/prompts/check_fact.yaml,sha256=zWFQDRhEE1ij9wSeeenS9YSTM-bY5zzUaG390zUgmcs,714
14
- texttools/prompts/extract_entities.yaml,sha256=_zYKHNJDIzVDI_-TnwFCKyMs-XLM5igvmWhvSTc3INQ,637
15
- texttools/prompts/extract_keywords.yaml,sha256=1o4u3uwzapNtB1BUpNIRL5qtrwjW0Yhvyq0TZJiafdg,3272
16
- texttools/prompts/is_question.yaml,sha256=jnPARd2ZiulLzHW_r4WAsz3sOryfz6Gy5-yYXp-2hd0,496
17
- texttools/prompts/merge_questions.yaml,sha256=l9Q2OEjPp3SDkxbq3zZCj2ZmXacWSnmYMpUr3l6r5yE,1816
18
- texttools/prompts/propositionize.yaml,sha256=nbGAfbm1-2Hoc0JLtqZi-S7VHQfnMmuTKI7dZeBxQW0,1403
19
- texttools/prompts/rewrite.yaml,sha256=klEm8MqXK-Bo8RsS5R9KLMT0zlD-BKo_G6tz9lpAcEY,5420
20
- texttools/prompts/run_custom.yaml,sha256=IETY9H0wPGWIIzcnupfbwwKQblwZrbYAxB754W9MhgU,125
21
- texttools/prompts/subject_to_question.yaml,sha256=AK16pZW9HUppIF8JBSEenbUNOU3aqeVV781_WUXnLqk,1160
22
- texttools/prompts/summarize.yaml,sha256=rPh060Bx_yI1W2JNg-nr83LUk9itatYLKM8ciH2pOvg,486
23
- texttools/prompts/text_to_question.yaml,sha256=pUwPgK9l5f8S4E5fCht9JY7PFVK2aY1InPfASr7R5o4,1017
24
- texttools/prompts/translate.yaml,sha256=Dd5bs3O8SI-FlVSwHMYGeEjMmdOWeRlcfBHkhixCx7c,665
25
- texttools/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- texttools/tools/async_tools.py,sha256=2suwx8N0aRnowaSOpV6C57AqPlmQe5Z0Yx4E5QIMkmU,46939
27
- texttools/tools/sync_tools.py,sha256=mEuL-nlbxVW30dPE3hGkAUnYXbul-3gN2Le4CMVFCgU,42528
28
- hamtaa_texttools-1.3.2.dist-info/METADATA,sha256=LjhXLwovneW5Ii1DvAYhFT4JR64ar23UyptCvCO6Hpc,7448
29
- hamtaa_texttools-1.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
30
- hamtaa_texttools-1.3.2.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
31
- hamtaa_texttools-1.3.2.dist-info/RECORD,,
texttools/core/engine.py DELETED
@@ -1,262 +0,0 @@
1
- import asyncio
2
- import math
3
- import random
4
- import re
5
- from functools import lru_cache
6
- from pathlib import Path
7
- from typing import Any
8
-
9
- import yaml
10
-
11
- from .exceptions import PromptError
12
-
13
-
14
- class PromptLoader:
15
- """
16
- Utility for loading and formatting YAML prompt templates.
17
- """
18
-
19
- MAIN_TEMPLATE = "main_template"
20
- ANALYZE_TEMPLATE = "analyze_template"
21
-
22
- @lru_cache(maxsize=32)
23
- def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
24
- try:
25
- base_dir = Path(__file__).parent.parent / Path("prompts")
26
- prompt_path = base_dir / prompt_file
27
-
28
- if not prompt_path.exists():
29
- raise PromptError(f"Prompt file not found: {prompt_file}")
30
-
31
- data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
32
-
33
- if self.MAIN_TEMPLATE not in data:
34
- raise PromptError(f"Missing 'main_template' in {prompt_file}")
35
-
36
- if self.ANALYZE_TEMPLATE not in data:
37
- raise PromptError(f"Missing 'analyze_template' in {prompt_file}")
38
-
39
- if mode and mode not in data.get(self.MAIN_TEMPLATE, {}):
40
- raise PromptError(f"Mode '{mode}' not found in {prompt_file}")
41
-
42
- main_template = (
43
- data[self.MAIN_TEMPLATE][mode]
44
- if mode and isinstance(data[self.MAIN_TEMPLATE], dict)
45
- else data[self.MAIN_TEMPLATE]
46
- )
47
-
48
- analyze_template = (
49
- data[self.ANALYZE_TEMPLATE][mode]
50
- if mode and isinstance(data[self.ANALYZE_TEMPLATE], dict)
51
- else data[self.ANALYZE_TEMPLATE]
52
- )
53
-
54
- if not main_template or not main_template.strip():
55
- raise PromptError(
56
- f"Empty main_template in {prompt_file}"
57
- + (f" for mode '{mode}'" if mode else "")
58
- )
59
-
60
- return {
61
- self.MAIN_TEMPLATE: main_template,
62
- self.ANALYZE_TEMPLATE: analyze_template,
63
- }
64
-
65
- except yaml.YAMLError as e:
66
- raise PromptError(f"Invalid YAML in {prompt_file}: {e}")
67
- except Exception as e:
68
- raise PromptError(f"Failed to load prompt {prompt_file}: {e}")
69
-
70
- def load(
71
- self, prompt_file: str, text: str, mode: str, **extra_kwargs
72
- ) -> dict[str, str]:
73
- try:
74
- format_args = {"text": text}
75
- format_args.update(extra_kwargs)
76
-
77
- template_configs = self._load_templates(prompt_file, mode)
78
- for key, value in template_configs.items():
79
- template_configs[key] = value.format(**format_args)
80
-
81
- return template_configs
82
-
83
- except KeyError as e:
84
- raise PromptError(f"Missing template variable: {e}")
85
- except Exception as e:
86
- raise PromptError(f"Failed to format prompt: {e}")
87
-
88
-
89
- class OperatorUtils:
90
- @staticmethod
91
- def build_main_prompt(
92
- main_template: str,
93
- analysis: str | None,
94
- output_lang: str | None,
95
- user_prompt: str | None,
96
- ) -> str:
97
- parts = []
98
-
99
- if analysis:
100
- parts.append(f"Based on this analysis: {analysis}")
101
- if output_lang:
102
- parts.append(f"Respond only in the {output_lang} language.")
103
- if user_prompt:
104
- parts.append(f"Consider this instruction: {user_prompt}")
105
-
106
- parts.append(main_template)
107
- return "\n".join(parts)
108
-
109
- @staticmethod
110
- def build_message(prompt: str) -> list[dict[str, str]]:
111
- return [{"role": "user", "content": prompt}]
112
-
113
- @staticmethod
114
- def extract_logprobs(completion: Any) -> list[dict]:
115
- """
116
- Extracts and filters logprobs from completion.
117
- Skips punctuation and structural tokens.
118
- """
119
- logprobs_data = []
120
-
121
- ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
122
-
123
- for choice in completion.choices:
124
- if not getattr(choice, "logprobs", None):
125
- raise ValueError("Your model does not support logprobs")
126
-
127
- for logprob_item in choice.logprobs.content:
128
- if ignore_pattern.match(logprob_item.token):
129
- continue
130
- token_entry = {
131
- "token": logprob_item.token,
132
- "prob": round(math.exp(logprob_item.logprob), 8),
133
- "top_alternatives": [],
134
- }
135
- for alt in logprob_item.top_logprobs:
136
- if ignore_pattern.match(alt.token):
137
- continue
138
- token_entry["top_alternatives"].append(
139
- {
140
- "token": alt.token,
141
- "prob": round(math.exp(alt.logprob), 8),
142
- }
143
- )
144
- logprobs_data.append(token_entry)
145
-
146
- return logprobs_data
147
-
148
- @staticmethod
149
- def get_retry_temp(base_temp: float) -> float:
150
- new_temp = base_temp + random.choice([-1, 1]) * random.uniform(0.1, 0.9)
151
- return max(0.0, min(new_temp, 1.5))
152
-
153
-
154
- def text_to_chunks(text: str, size: int, overlap: int) -> list[str]:
155
- """
156
- Utility for chunking large texts. Used for translation tool
157
- """
158
- separators = ["\n\n", "\n", " ", ""]
159
- is_separator_regex = False
160
- keep_separator = True
161
- length_function = len
162
- strip_whitespace = True
163
- chunk_size = size
164
- chunk_overlap = overlap
165
-
166
- def _split_text_with_regex(
167
- text: str, separator: str, keep_separator: bool
168
- ) -> list[str]:
169
- if not separator:
170
- return [text]
171
- if not keep_separator:
172
- return re.split(separator, text)
173
- _splits = re.split(f"({separator})", text)
174
- splits = [_splits[i] + _splits[i + 1] for i in range(1, len(_splits), 2)]
175
- if len(_splits) % 2 == 0:
176
- splits += [_splits[-1]]
177
- return [_splits[0]] + splits if _splits[0] else splits
178
-
179
- def _join_docs(docs: list[str], separator: str) -> str | None:
180
- text = separator.join(docs)
181
- if strip_whitespace:
182
- text = text.strip()
183
- return text if text else None
184
-
185
- def _merge_splits(splits: list[str], separator: str) -> list[str]:
186
- separator_len = length_function(separator)
187
- docs = []
188
- current_doc = []
189
- total = 0
190
- for d in splits:
191
- len_ = length_function(d)
192
- if total + len_ + (separator_len if current_doc else 0) > chunk_size:
193
- if total > chunk_size:
194
- pass
195
- if current_doc:
196
- doc = _join_docs(current_doc, separator)
197
- if doc is not None:
198
- docs.append(doc)
199
- while total > chunk_overlap or (
200
- total + len_ + (separator_len if current_doc else 0)
201
- > chunk_size
202
- and total > 0
203
- ):
204
- total -= length_function(current_doc[0]) + (
205
- separator_len if len(current_doc) > 1 else 0
206
- )
207
- current_doc = current_doc[1:]
208
- current_doc.append(d)
209
- total += len_ + (separator_len if len(current_doc) > 1 else 0)
210
- doc = _join_docs(current_doc, separator)
211
- if doc is not None:
212
- docs.append(doc)
213
- return docs
214
-
215
- def _split_text(text: str, separators: list[str]) -> list[str]:
216
- final_chunks = []
217
- separator = separators[-1]
218
- new_separators = []
219
- for i, _s in enumerate(separators):
220
- separator_ = _s if is_separator_regex else re.escape(_s)
221
- if not _s:
222
- separator = _s
223
- break
224
- if re.search(separator_, text):
225
- separator = _s
226
- new_separators = separators[i + 1 :]
227
- break
228
- separator_ = separator if is_separator_regex else re.escape(separator)
229
- splits = _split_text_with_regex(text, separator_, keep_separator)
230
- _separator = "" if keep_separator else separator
231
- good_splits = []
232
- for s in splits:
233
- if length_function(s) < chunk_size:
234
- good_splits.append(s)
235
- else:
236
- if good_splits:
237
- merged_text = _merge_splits(good_splits, _separator)
238
- final_chunks.extend(merged_text)
239
- good_splits = []
240
- if not new_separators:
241
- final_chunks.append(s)
242
- else:
243
- other_info = _split_text(s, new_separators)
244
- final_chunks.extend(other_info)
245
- if good_splits:
246
- merged_text = _merge_splits(good_splits, _separator)
247
- final_chunks.extend(merged_text)
248
- return final_chunks
249
-
250
- return _split_text(text, separators)
251
-
252
-
253
- async def run_with_timeout(coro, timeout: float | None):
254
- """
255
- Utility for timeout logic defined in AsyncTheTool
256
- """
257
- if timeout is None:
258
- return await coro
259
- try:
260
- return await asyncio.wait_for(coro, timeout=timeout)
261
- except asyncio.TimeoutError:
262
- raise TimeoutError(f"Operation exceeded timeout of {timeout} seconds")
@@ -1,26 +0,0 @@
1
- main_template: |
2
- You are a question from subject generator.
3
- Given the following subject, generate {number_of_questions} appropriate questions that this subject would directly respond to.
4
- The generated subject should be independently meaningful,
5
- and it must not mention any verbs like, this, that, he or she and etc. in the question.
6
-
7
- There is a `reason` key, fill that up with a summerized version of your thoughts.
8
- The `reason` must be less than 20 words.
9
- Don't forget to fill the reason.
10
-
11
- Respond only in JSON format:
12
- {{"result": ["question1", "question2", ...], "reason": "string"}}
13
-
14
- Here is the subject:
15
- {text}
16
-
17
- analyze_template: |
18
- Our goal is to generate questions from the given subject.
19
- The questions must be meaningfull, some of them should be specific and some should be general.
20
- But first, in this step we want to analyze the subject that I asked to generate questions for it.
21
- We need a summerized analysis of the subject.
22
- What is the subject about?
23
- What point of views can we see and generate questoins from it? (Questions that real users might have.)
24
-
25
- Here is the subject:
26
- {text}
@@ -1,26 +0,0 @@
1
- main_template: |
2
- You are a question generator.
3
- Given the following answer, generate {number_of_questions} appropriate questions that this answer would directly respond to.
4
- The generated answer should be independently meaningful,
5
- and not mentioning any verbs like, this, that, he or she on the question.
6
-
7
- There is a `reason` key, fill that up with a summerized version of your thoughts.
8
- The `reason` must be less than 20 words.
9
- Don't forget to fill the reason.
10
-
11
- Respond only in JSON format:
12
- {{"result": ["question1", "question2", ...], "reason": "string"}}
13
-
14
- Here is the answer:
15
- {text}
16
-
17
- analyze_template: |
18
- Analyze the following answer to identify its key facts,
19
- main subject, and what kind of information it provides.
20
- Provide a brief, summarized understanding of the answer's content that will
21
- help in formulating relevant and direct questions.
22
- Just mention the keypoints that was provided in the answer
23
-
24
- Here is the answer:
25
- {text}
26
-