hamtaa-texttools 1.1.21__py3-none-any.whl → 1.1.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- from typing import TypeVar, Type
1
+ from typing import TypeVar, Type, Any
2
2
  from collections.abc import Callable
3
3
 
4
4
  from openai import OpenAI
@@ -27,17 +27,11 @@ class Operator:
27
27
  self._client = client
28
28
  self._model = model
29
29
 
30
- def _analyze_completion(self, analyze_prompt: str, temperature: float) -> str:
30
+ def _analyze_completion(self, analyze_message: list[dict[str, str]]) -> str:
31
31
  try:
32
- if not analyze_prompt:
33
- raise PromptError("Analyze template is empty")
34
-
35
- analyze_message = OperatorUtils.build_user_message(analyze_prompt)
36
-
37
32
  completion = self._client.chat.completions.create(
38
33
  model=self._model,
39
34
  messages=analyze_message,
40
- temperature=temperature,
41
35
  )
42
36
 
43
37
  if not completion.choices:
@@ -57,20 +51,18 @@ class Operator:
57
51
 
58
52
  def _parse_completion(
59
53
  self,
60
- main_prompt: str,
54
+ main_message: list[dict[str, str]],
61
55
  output_model: Type[T],
62
56
  temperature: float,
63
57
  logprobs: bool,
64
58
  top_logprobs: int,
65
- priority: int,
66
- ) -> tuple[T, object]:
59
+ priority: int | None,
60
+ ) -> tuple[T, Any]:
67
61
  """
68
62
  Parses a chat completion using OpenAI's structured output format.
69
- Returns both the parsed object and the raw completion for logprobs.
63
+ Returns both the parsed Any and the raw completion for logprobs.
70
64
  """
71
65
  try:
72
- main_message = OperatorUtils.build_user_message(main_prompt)
73
-
74
66
  request_kwargs = {
75
67
  "model": self._model,
76
68
  "messages": main_message,
@@ -82,7 +74,7 @@ class Operator:
82
74
  request_kwargs["logprobs"] = True
83
75
  request_kwargs["top_logprobs"] = top_logprobs
84
76
 
85
- if priority:
77
+ if priority is not None:
86
78
  request_kwargs["extra_body"] = {"priority": priority}
87
79
 
88
80
  completion = self._client.beta.chat.completions.parse(**request_kwargs)
@@ -112,50 +104,48 @@ class Operator:
112
104
  temperature: float,
113
105
  logprobs: bool,
114
106
  top_logprobs: int,
115
- validator: Callable[[object], bool] | None,
107
+ validator: Callable[[Any], bool] | None,
116
108
  max_validation_retries: int | None,
117
- priority: int,
109
+ priority: int | None,
118
110
  # Internal parameters
119
- prompt_file: str,
111
+ tool_name: str,
120
112
  output_model: Type[T],
121
113
  mode: str | None,
122
114
  **extra_kwargs,
123
115
  ) -> OperatorOutput:
124
116
  """
125
- Execute the LLM pipeline with the given input text. (Sync)
117
+ Execute the LLM pipeline with the given input text.
126
118
  """
127
119
  try:
128
120
  prompt_loader = PromptLoader()
129
-
130
121
  prompt_configs = prompt_loader.load(
131
- prompt_file=prompt_file,
122
+ prompt_file=tool_name + ".yaml",
132
123
  text=text.strip(),
133
124
  mode=mode,
134
125
  **extra_kwargs,
135
126
  )
136
127
 
137
- main_prompt = ""
138
- analysis = ""
128
+ analysis: str | None = None
139
129
 
140
130
  if with_analysis:
141
- analysis = self._analyze_completion(
142
- prompt_configs["analyze_template"], temperature
131
+ analyze_message = OperatorUtils.build_message(
132
+ prompt_configs["analyze_template"]
143
133
  )
144
- main_prompt += f"Based on this analysis:\n{analysis}\n"
145
-
146
- if output_lang:
147
- main_prompt += f"Respond only in the {output_lang} language.\n"
134
+ analysis = self._analyze_completion(analyze_message)
148
135
 
149
- if user_prompt:
150
- main_prompt += f"Consider this instruction {user_prompt}\n"
151
-
152
- main_prompt += prompt_configs["main_template"]
153
-
154
- if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
155
- raise ValueError("top_logprobs should be an integer greater than 1")
136
+ main_message = OperatorUtils.build_message(
137
+ OperatorUtils.build_main_prompt(
138
+ prompt_configs["main_template"], analysis, output_lang, user_prompt
139
+ )
140
+ )
156
141
 
157
142
  parsed, completion = self._parse_completion(
158
- main_prompt, output_model, temperature, logprobs, top_logprobs, priority
143
+ main_message,
144
+ output_model,
145
+ temperature,
146
+ logprobs,
147
+ top_logprobs,
148
+ priority,
159
149
  )
160
150
 
161
151
  # Retry logic if validation fails
@@ -164,9 +154,7 @@ class Operator:
164
154
  not isinstance(max_validation_retries, int)
165
155
  or max_validation_retries < 1
166
156
  ):
167
- raise ValueError(
168
- "max_validation_retries should be a positive integer"
169
- )
157
+ raise ValueError("max_validation_retries should be a positive int")
170
158
 
171
159
  succeeded = False
172
160
  for _ in range(max_validation_retries):
@@ -175,7 +163,7 @@ class Operator:
175
163
 
176
164
  try:
177
165
  parsed, completion = self._parse_completion(
178
- main_prompt,
166
+ main_message,
179
167
  output_model,
180
168
  retry_temperature,
181
169
  logprobs,
@@ -23,7 +23,7 @@ main_template: |
23
23
  Available categories with their descriptions:
24
24
  {category_list}
25
25
 
26
- The text that has to be categorized:
26
+ Here is the text:
27
27
  {text}
28
28
 
29
29
  analyze_template: |
@@ -31,5 +31,6 @@ analyze_template: |
31
31
  To improve categorization, we need an analysis of the text.
32
32
  Analyze the given text and write its main idea and a short analysis of that.
33
33
  Analysis should be very short.
34
- Text:
34
+
35
+ Here is the text:
35
36
  {text}
@@ -2,10 +2,13 @@ main_template: |
2
2
  You are an expert in determining whether a statement can be concluded from the source text or not.
3
3
  You must return a boolean value: True or False.
4
4
  Return True if the statement can be concluded from the source, and False otherwise.
5
+
5
6
  Respond only in JSON format (Output should be a boolean):
6
7
  {{"result": True/False}}
8
+
7
9
  The statement is:
8
10
  {text}
11
+
9
12
  The source text is:
10
13
  {source_text}
11
14
 
@@ -13,7 +16,9 @@ analyze_template: |
13
16
  You should analyze a statement and a source text and provide a brief,
14
17
  summarized analysis that could help in determining that can the statement
15
18
  be concluded from the source or not.
19
+
16
20
  The statement is:
17
21
  {text}
22
+
18
23
  The source text is:
19
24
  {source_text}
@@ -2,6 +2,7 @@ main_template: |
2
2
  You are a Named Entity Recognition (NER) extractor.
3
3
  Identify and extract {entities} from the given text.
4
4
  For each entity, provide its text and a clear type.
5
+
5
6
  Respond only in JSON format:
6
7
  {{
7
8
  "result": [
@@ -11,10 +12,13 @@ main_template: |
11
12
  }}
12
13
  ]
13
14
  }}
15
+
14
16
  Here is the text:
15
17
  {text}
16
18
 
17
19
  analyze_template: |
18
20
  Read the following text and identify any proper nouns, key concepts, or specific mentions that might represent named entities.
19
21
  Provide a brief, summarized analysis that could help in categorizing these entities.
22
+
23
+ Here is the text:
20
24
  {text}
@@ -3,14 +3,17 @@ main_template:
3
3
  auto: |
4
4
  You are an expert keyword extractor.
5
5
  Extract the most relevant keywords from the given text.
6
+
6
7
  Guidelines:
7
8
  - Keywords must represent the main concepts of the text.
8
9
  - If two words have overlapping meanings, choose only one.
9
10
  - Do not include generic or unrelated words.
10
11
  - Keywords must be single, self-contained words (no phrases).
11
12
  - Output between 3 and 7 keywords based on the input length.
12
- - Respond only in JSON format:
13
+
14
+ Respond only in JSON format:
13
15
  {{"result": ["keyword1", "keyword2", etc.]}}
16
+
14
17
  Here is the text:
15
18
  {text}
16
19
 
@@ -29,8 +32,10 @@ main_template:
29
32
  - Short texts (a few sentences): 3 keywords
30
33
  - Medium texts (1–4 paragraphs): 4–5 keywords
31
34
  - Long texts (more than 4 paragraphs): 6–7 keywords
32
- - Respond only in JSON format:
35
+
36
+ Respond only in JSON format:
33
37
  {{"result": ["keyword1", "keyword2", etc.]}}
38
+
34
39
  Here is the text:
35
40
  {text}
36
41
 
@@ -45,7 +50,8 @@ main_template:
45
50
  - If the text doesn't contain enough distinct keywords, include the most relevant ones even if some are less specific.
46
51
  - Keywords must be single words (no multi-word expressions).
47
52
  - Order keywords by relevance (most relevant first).
48
- - Respond only in JSON format:
53
+
54
+ Respond only in JSON format:
49
55
  {{"result": ["keyword1", "keyword2", "keyword3", ...]}}
50
56
 
51
57
  Here is the text:
@@ -55,14 +61,20 @@ analyze_template:
55
61
  auto: |
56
62
  Analyze the following text to identify its main topics, concepts, and important terms.
57
63
  Provide a concise summary of your findings that will help in extracting relevant keywords.
64
+
65
+ Here is the text:
58
66
  {text}
59
67
 
60
68
  threshold: |
61
69
  Analyze the following text to identify its main topics, concepts, and important terms.
62
70
  Provide a concise summary of your findings that will help in extracting relevant keywords.
71
+
72
+ Here is the text:
63
73
  {text}
64
74
 
65
75
  count: |
66
76
  Analyze the following text to identify its main topics, concepts, and important terms.
67
77
  Provide a concise summary of your findings that will help in extracting relevant keywords.
78
+
79
+ Here is the text:
68
80
  {text}
@@ -1,8 +1,10 @@
1
1
  main_template: |
2
2
  You are a question detector.
3
3
  Determine that if the given text contains any question or not.
4
+
4
5
  Respond only in JSON format (Output should be a boolean):
5
6
  {{"result": True/False}}
7
+
6
8
  Here is the text:
7
9
  {text}
8
10
 
@@ -10,5 +12,7 @@ analyze_template: |
10
12
  We want to analyze this text snippet to see if it contains any question or request of some kind or not.
11
13
  Read the text, and reason about it being a request or not.
12
14
  Summerized, short answer.
15
+
16
+ Here is the text:
13
17
  {text}
14
18
 
@@ -4,13 +4,16 @@ main_template:
4
4
  You are a language expert.
5
5
  I will give you a list of questions that are semantically similar.
6
6
  Your task is to merge them into one unified question.
7
+
7
8
  Guidelines:
8
9
  - Preserves all the information and intent from the original questions.
9
10
  - Sounds natural, fluent, and concise.
10
11
  - Avoids redundancy or unnecessary repetition.
11
12
  - Does not omit any unique idea from the originals.
12
- - Respond only in JSON format:
13
+
14
+ Respond only in JSON format:
13
15
  {{"result": "string"}}
16
+
14
17
  Here is the questions:
15
18
  {text}
16
19
 
@@ -20,8 +23,10 @@ main_template:
20
23
  Then, write one merged question that combines all their content clearly and naturally, without redundancy.
21
24
  Step 1: Extract key ideas.
22
25
  Step 2: Write the final merged question.
26
+
23
27
  Respond only in JSON format:
24
28
  {{"result": "string"}}
29
+
25
30
  Here is the questions:
26
31
  {text}
27
32
 
@@ -33,6 +38,7 @@ analyze_template:
33
38
  and the specific information they are seeking.
34
39
  Provide a brief, summarized understanding of the questions' meaning that
35
40
  will help in merging and rephrasing it accurately without changing its intent.
41
+
36
42
  Here is the question:
37
43
  {text}
38
44
 
@@ -41,6 +47,7 @@ analyze_template:
41
47
  and the literal meaning it conveys.
42
48
  Provide a brief, summarized analysis of their linguistic structure and current meaning,
43
49
  which will then be used to create a new question containing all of their contents.
50
+
44
51
  Here is the question:
45
52
  {text}
46
53
 
@@ -19,4 +19,6 @@ analyze_template: |
19
19
  An atomic proposition is a single, self-contained fact that is concise,
20
20
  verifiable, and does not rely on external context.
21
21
  You just have to think around the possible propositions in the text and how a proposition can be made.
22
+
23
+ Here is the text:
22
24
  {text}
@@ -52,7 +52,6 @@ main_template:
52
52
  - Make it Challenging: The difference should be subtle enough that it requires a deep understanding of the text to identify, not just a simple keyword mismatch.
53
53
  - Maintain Similar Length: The generated sentence should be of roughly the same length and level of detail as the Anchor.
54
54
 
55
-
56
55
  Respond only in JSON format:
57
56
  {{"result": "str"}}
58
57
 
@@ -73,7 +72,7 @@ analyze_template:
73
72
 
74
73
  Your analysis should capture the ESSENTIAL MEANING that must be preserved in any paraphrase.
75
74
 
76
- Text:
75
+ Here is the text:
77
76
  {text}
78
77
 
79
78
  negative: |
@@ -87,7 +86,7 @@ analyze_template:
87
86
 
88
87
  The goal is to find topics that are in the same domain but semantically unrelated to this specific text.
89
88
 
90
- Text:
89
+ Here is the text:
91
90
  {text}
92
91
 
93
92
  hard_negative: |
@@ -106,6 +105,6 @@ analyze_template:
106
105
  - Sentence structure
107
106
  - 80-90% of the vocabulary
108
107
 
109
- Text:
108
+ Here is the text:
110
109
  {text}
111
110
 
@@ -3,12 +3,15 @@ main_template: |
3
3
  Given the following subject, generate {number_of_questions} appropriate questions that this subject would directly respond to.
4
4
  The generated subject should be independently meaningful,
5
5
  and it must not mention any verbs like, this, that, he or she and etc. in the question.
6
+
6
7
  There is a `reason` key, fill that up with a summerized version of your thoughts.
7
8
  The `reason` must be less than 20 words.
8
9
  Don't forget to fill the reason.
10
+
9
11
  Respond only in JSON format:
10
12
  {{"result": ["question1", "question2", ...], "reason": "string"}}
11
- Here is the text:
13
+
14
+ Here is the subject:
12
15
  {text}
13
16
 
14
17
  analyze_template: |
@@ -18,5 +21,6 @@ analyze_template: |
18
21
  We need a summerized analysis of the subject.
19
22
  What is the subject about?
20
23
  What point of views can we see and generate questoins from it? (Questions that real users might have.)
24
+
21
25
  Here is the subject:
22
26
  {text}
@@ -1,8 +1,10 @@
1
1
  main_template: |
2
2
  You are a summarizer.
3
3
  You must summarize the given text, preserving its meaning.
4
+
4
5
  Respond only in JSON format:
5
6
  {{"result": "string"}}
7
+
6
8
  Provide a concise summary of the following text:
7
9
  {text}
8
10
 
@@ -10,5 +12,7 @@ main_template: |
10
12
  analyze_template: |
11
13
  Read the following text and identify its main points, key arguments, and overall purpose.
12
14
  Provide a brief, summarized analysis that will help in generating an accurate and concise summary.
15
+
16
+ Here is the text:
13
17
  {text}
14
18
 
@@ -3,11 +3,14 @@ main_template: |
3
3
  Given the following answer, generate {number_of_questions} appropriate questions that this answer would directly respond to.
4
4
  The generated answer should be independently meaningful,
5
5
  and not mentioning any verbs like, this, that, he or she on the question.
6
+
6
7
  There is a `reason` key, fill that up with a summerized version of your thoughts.
7
8
  The `reason` must be less than 20 words.
8
9
  Don't forget to fill the reason.
10
+
9
11
  Respond only in JSON format:
10
12
  {{"result": ["question1", "question2", ...], "reason": "string"}}
13
+
11
14
  Here is the answer:
12
15
  {text}
13
16
 
@@ -17,6 +20,7 @@ analyze_template: |
17
20
  Provide a brief, summarized understanding of the answer's content that will
18
21
  help in formulating relevant and direct questions.
19
22
  Just mention the keypoints that was provided in the answer
23
+
20
24
  Here is the answer:
21
25
  {text}
22
26
 
@@ -1,9 +1,12 @@
1
1
  main_template: |
2
2
  You are a {target_language} translator.
3
3
  Output only the translated text.
4
+
4
5
  Respond only in JSON format:
5
6
  {{"result": "string"}}
7
+
6
8
  Don't translate proper name, only transliterate them to {target_language}
9
+
7
10
  Translate the following text to {target_language}:
8
11
  {text}
9
12
 
@@ -11,5 +14,7 @@ analyze_template: |
11
14
  Analyze the following text and identify important linguistic considerations for translation.
12
15
  Point out any idioms, cultural references, or complex structures that need special attention.
13
16
  Also, list all proper nouns that should not be translated. Write your analysis in the {target_language}.
17
+
18
+ Here is the text:
14
19
  {text}
15
20