hamtaa-texttools 1.1.16__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. hamtaa_texttools-1.2.0.dist-info/METADATA +212 -0
  2. hamtaa_texttools-1.2.0.dist-info/RECORD +34 -0
  3. texttools/__init__.py +5 -5
  4. texttools/batch/__init__.py +0 -0
  5. texttools/batch/{batch_config.py → config.py} +16 -2
  6. texttools/batch/{internals/batch_manager.py → manager.py} +2 -2
  7. texttools/batch/{batch_runner.py → runner.py} +80 -69
  8. texttools/core/__init__.py +0 -0
  9. texttools/core/engine.py +254 -0
  10. texttools/core/exceptions.py +22 -0
  11. texttools/core/internal_models.py +58 -0
  12. texttools/core/operators/async_operator.py +194 -0
  13. texttools/core/operators/sync_operator.py +192 -0
  14. texttools/models.py +88 -0
  15. texttools/prompts/categorize.yaml +36 -77
  16. texttools/prompts/check_fact.yaml +24 -0
  17. texttools/prompts/extract_entities.yaml +7 -3
  18. texttools/prompts/extract_keywords.yaml +21 -9
  19. texttools/prompts/is_question.yaml +6 -2
  20. texttools/prompts/merge_questions.yaml +12 -5
  21. texttools/prompts/propositionize.yaml +24 -0
  22. texttools/prompts/rewrite.yaml +9 -10
  23. texttools/prompts/run_custom.yaml +2 -2
  24. texttools/prompts/subject_to_question.yaml +7 -3
  25. texttools/prompts/summarize.yaml +6 -2
  26. texttools/prompts/text_to_question.yaml +12 -6
  27. texttools/prompts/translate.yaml +7 -2
  28. texttools/py.typed +0 -0
  29. texttools/tools/__init__.py +0 -0
  30. texttools/tools/async_tools.py +778 -489
  31. texttools/tools/sync_tools.py +775 -487
  32. hamtaa_texttools-1.1.16.dist-info/METADATA +0 -255
  33. hamtaa_texttools-1.1.16.dist-info/RECORD +0 -31
  34. texttools/batch/internals/utils.py +0 -16
  35. texttools/prompts/README.md +0 -35
  36. texttools/prompts/detect_entity.yaml +0 -22
  37. texttools/tools/internals/async_operator.py +0 -200
  38. texttools/tools/internals/formatters.py +0 -24
  39. texttools/tools/internals/models.py +0 -183
  40. texttools/tools/internals/operator_utils.py +0 -54
  41. texttools/tools/internals/prompt_loader.py +0 -56
  42. texttools/tools/internals/sync_operator.py +0 -201
  43. {hamtaa_texttools-1.1.16.dist-info → hamtaa_texttools-1.2.0.dist-info}/WHEEL +0 -0
  44. {hamtaa_texttools-1.1.16.dist-info → hamtaa_texttools-1.2.0.dist-info}/licenses/LICENSE +0 -0
  45. {hamtaa_texttools-1.1.16.dist-info → hamtaa_texttools-1.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,192 @@
1
+ from collections.abc import Callable
2
+ from typing import Any, Type, TypeVar
3
+
4
+ from openai import OpenAI
5
+ from pydantic import BaseModel
6
+
7
+ from ..engine import OperatorUtils, PromptLoader
8
+ from ..exceptions import LLMError, PromptError, TextToolsError, ValidationError
9
+ from ..internal_models import OperatorOutput
10
+
11
+ # Base Model type for output models
12
+ T = TypeVar("T", bound=BaseModel)
13
+
14
+
15
+ class Operator:
16
+ """
17
+ Core engine for running text-processing operations with an LLM.
18
+ """
19
+
20
+ def __init__(self, client: OpenAI, model: str):
21
+ self._client = client
22
+ self._model = model
23
+
24
+ def _analyze_completion(self, analyze_message: list[dict[str, str]]) -> str:
25
+ try:
26
+ completion = self._client.chat.completions.create(
27
+ model=self._model,
28
+ messages=analyze_message,
29
+ )
30
+
31
+ if not completion.choices:
32
+ raise LLMError("No choices returned from LLM")
33
+
34
+ analysis = completion.choices[0].message.content.strip()
35
+
36
+ if not analysis:
37
+ raise LLMError("Empty analysis response")
38
+
39
+ return analysis
40
+
41
+ except Exception as e:
42
+ if isinstance(e, (PromptError, LLMError)):
43
+ raise
44
+ raise LLMError(f"Analysis failed: {e}")
45
+
46
+ def _parse_completion(
47
+ self,
48
+ main_message: list[dict[str, str]],
49
+ output_model: Type[T],
50
+ temperature: float,
51
+ logprobs: bool,
52
+ top_logprobs: int,
53
+ priority: int | None,
54
+ ) -> tuple[T, Any]:
55
+ """
56
+ Parses a chat completion using OpenAI's structured output format.
57
+ Returns both the parsed Any and the raw completion for logprobs.
58
+ """
59
+ try:
60
+ request_kwargs = {
61
+ "model": self._model,
62
+ "messages": main_message,
63
+ "response_format": output_model,
64
+ "temperature": temperature,
65
+ }
66
+
67
+ if logprobs:
68
+ request_kwargs["logprobs"] = True
69
+ request_kwargs["top_logprobs"] = top_logprobs
70
+
71
+ if priority is not None:
72
+ request_kwargs["extra_body"] = {"priority": priority}
73
+
74
+ completion = self._client.beta.chat.completions.parse(**request_kwargs)
75
+
76
+ if not completion.choices:
77
+ raise LLMError("No choices returned from LLM")
78
+
79
+ parsed = completion.choices[0].message.parsed
80
+
81
+ if not parsed:
82
+ raise LLMError("Failed to parse LLM response")
83
+
84
+ return parsed, completion
85
+
86
+ except Exception as e:
87
+ if isinstance(e, LLMError):
88
+ raise
89
+ raise LLMError(f"Completion failed: {e}")
90
+
91
+ def run(
92
+ self,
93
+ # User parameters
94
+ text: str,
95
+ with_analysis: bool,
96
+ output_lang: str | None,
97
+ user_prompt: str | None,
98
+ temperature: float,
99
+ logprobs: bool,
100
+ top_logprobs: int,
101
+ validator: Callable[[Any], bool] | None,
102
+ max_validation_retries: int | None,
103
+ priority: int | None,
104
+ # Internal parameters
105
+ tool_name: str,
106
+ output_model: Type[T],
107
+ mode: str | None,
108
+ **extra_kwargs,
109
+ ) -> OperatorOutput:
110
+ """
111
+ Execute the LLM pipeline with the given input text.
112
+ """
113
+ try:
114
+ prompt_loader = PromptLoader()
115
+ prompt_configs = prompt_loader.load(
116
+ prompt_file=tool_name + ".yaml",
117
+ text=text.strip(),
118
+ mode=mode,
119
+ **extra_kwargs,
120
+ )
121
+
122
+ analysis: str | None = None
123
+
124
+ if with_analysis:
125
+ analyze_message = OperatorUtils.build_message(
126
+ prompt_configs["analyze_template"]
127
+ )
128
+ analysis = self._analyze_completion(analyze_message)
129
+
130
+ main_message = OperatorUtils.build_message(
131
+ OperatorUtils.build_main_prompt(
132
+ prompt_configs["main_template"], analysis, output_lang, user_prompt
133
+ )
134
+ )
135
+
136
+ parsed, completion = self._parse_completion(
137
+ main_message,
138
+ output_model,
139
+ temperature,
140
+ logprobs,
141
+ top_logprobs,
142
+ priority,
143
+ )
144
+
145
+ # Retry logic if validation fails
146
+ if validator and not validator(parsed.result):
147
+ if (
148
+ not isinstance(max_validation_retries, int)
149
+ or max_validation_retries < 1
150
+ ):
151
+ raise ValueError("max_validation_retries should be a positive int")
152
+
153
+ succeeded = False
154
+ for _ in range(max_validation_retries):
155
+ # Generate a new temperature to retry
156
+ retry_temperature = OperatorUtils.get_retry_temp(temperature)
157
+
158
+ try:
159
+ parsed, completion = self._parse_completion(
160
+ main_message,
161
+ output_model,
162
+ retry_temperature,
163
+ logprobs,
164
+ top_logprobs,
165
+ priority=priority,
166
+ )
167
+
168
+ # Check if retry was successful
169
+ if validator(parsed.result):
170
+ succeeded = True
171
+ break
172
+
173
+ except LLMError:
174
+ pass
175
+
176
+ if not succeeded:
177
+ raise ValidationError("Validation failed after all retries")
178
+
179
+ operator_output = OperatorOutput(
180
+ result=parsed.result,
181
+ analysis=analysis if with_analysis else None,
182
+ logprobs=OperatorUtils.extract_logprobs(completion)
183
+ if logprobs
184
+ else None,
185
+ )
186
+
187
+ return operator_output
188
+
189
+ except (PromptError, LLMError, ValidationError):
190
+ raise
191
+ except Exception as e:
192
+ raise TextToolsError(f"Unexpected error in operator: {e}")
texttools/models.py ADDED
@@ -0,0 +1,88 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from typing import Any
5
+
6
+ from pydantic import BaseModel
7
+
8
+
9
+ class ToolOutputMetadata(BaseModel):
10
+ tool_name: str
11
+ processed_at: datetime = datetime.now()
12
+ execution_time: float | None = None
13
+
14
+
15
+ class ToolOutput(BaseModel):
16
+ result: Any = None
17
+ analysis: str | None = None
18
+ logprobs: list[dict[str, Any]] | None = None
19
+ errors: list[str] = []
20
+ metadata: ToolOutputMetadata | None = None
21
+
22
+ def __repr__(self) -> str:
23
+ return f"ToolOutput({self.model_dump_json(indent=2)})"
24
+
25
+
26
+ class Node:
27
+ def __init__(self, name: str, description: str, level: int, parent: Node | None):
28
+ self.name = name
29
+ self.description = description
30
+ self.level = level
31
+ self.parent = parent
32
+ self.children = {}
33
+
34
+
35
+ class CategoryTree:
36
+ def __init__(self):
37
+ self._root = Node(name="root", description="root", level=0, parent=None)
38
+ self._all_nodes = {"root": self._root}
39
+
40
+ def get_all_nodes(self) -> dict[str, Node]:
41
+ return self._all_nodes
42
+
43
+ def get_level_count(self) -> int:
44
+ return max(node.level for node in self._all_nodes.values())
45
+
46
+ def get_node(self, name: str) -> Node | None:
47
+ return self._all_nodes.get(name)
48
+
49
+ def add_node(
50
+ self,
51
+ name: str,
52
+ parent_name: str,
53
+ description: str | None = None,
54
+ ) -> None:
55
+ if self.get_node(name):
56
+ raise ValueError(f"Cannot add {name} category twice")
57
+
58
+ parent = self.get_node(parent_name)
59
+
60
+ if not parent:
61
+ raise ValueError(f"Parent category '{parent_name}' not found")
62
+
63
+ node_data = {
64
+ "name": name,
65
+ "description": description if description else "No description provided",
66
+ "level": parent.level + 1,
67
+ "parent": parent,
68
+ }
69
+
70
+ new_node = Node(**node_data)
71
+ parent.children[name] = new_node
72
+ self._all_nodes[name] = new_node
73
+
74
+ def remove_node(self, name: str) -> None:
75
+ if name == "root":
76
+ raise ValueError("Cannot remove the root node")
77
+
78
+ node = self.get_node(name)
79
+ if not node:
80
+ raise ValueError(f"Category: '{name}' not found")
81
+
82
+ for child_name in list(node.children.keys()):
83
+ self.remove_node(child_name)
84
+
85
+ if node.parent:
86
+ del node.parent.children[name]
87
+
88
+ del self._all_nodes[name]
@@ -1,77 +1,36 @@
1
- main_template:
2
-
3
- category_list: |
4
- You are an expert classification agent.
5
- You receive a list of categories.
6
-
7
- Your task:
8
- - Read all provided categories carefully.
9
- - Consider the user query, intent, and task explanation.
10
- - Select exactly one category name from the list that best matches the user’s intent.
11
- - Return only the category name, nothing else.
12
-
13
- Rules:
14
- - Never invent categories that are not in the list.
15
- - If multiple categories seem possible, choose the closest match based on the description and user intent.
16
- - If descriptions are missing or empty, rely on the category name.
17
- - If the correct answer cannot be determined with certainty, choose the most likely one.
18
-
19
- Output format:
20
- {{
21
- "reason": "Explanation of why the input belongs to the category"
22
- "result": "<category_name_only>"
23
- }}
24
-
25
- Available categories with their descriptions:
26
- {category_list}
27
-
28
- The text that has to be categorized:
29
- {input}
30
-
31
- category_tree: |
32
- You are an expert classification agent.
33
- You receive a list of categories at the current level of a hierarchical category tree.
34
-
35
- Your task:
36
- - Read all provided categories carefully.
37
- - Consider the user query, intent, and task explanation.
38
- - Select exactly one category name from the list that best matches the user’s intent.
39
- - Return only the category name, nothing else.
40
-
41
- Rules:
42
- - Never invent categories that are not in the list.
43
- - If multiple categories seem possible, choose the closest match based on the description and user intent.
44
- - If descriptions are missing or empty, rely on the category name.
45
- - If the correct answer cannot be determined with certainty, choose the most likely one.
46
-
47
- Output format:
48
- {{
49
- "reason": "Explanation of why the input belongs to the category"
50
- "result": "<category_name_only>"
51
- }}
52
-
53
- Available categories with their descriptions at this level:
54
- {category_list}
55
-
56
- Do not include category descriptions at all. Only write the raw category.
57
-
58
- The text that has to be categorized:
59
- {input}
60
-
61
- analyze_template:
62
-
63
- category_list: |
64
- We want to categorize the given text.
65
- To improve categorization, we need an analysis of the text.
66
- Analyze the given text and write its main idea and a short analysis of that.
67
- Analysis should be very short.
68
- Text:
69
- {input}
70
-
71
- category_tree: |
72
- We want to categorize the given text.
73
- To improve categorization, we need an analysis of the text.
74
- Analyze the given text and write its main idea and a short analysis of that.
75
- Analysis should be very short.
76
- Text:
77
- {input}
1
+ main_template: |
2
+ You are an expert classification agent.
3
+ You receive a list of categories.
4
+
5
+ Your task:
6
+ - Read all provided categories carefully.
7
+ - Consider the user query, intent, and task explanation.
8
+ - Select exactly one category name from the list that best matches the user’s intent.
9
+ - Return only the category name, nothing else.
10
+
11
+ Rules:
12
+ - Never invent categories that are not in the list.
13
+ - If multiple categories seem possible, choose the closest match based on the description and user intent.
14
+ - If descriptions are missing or empty, rely on the category name.
15
+ - If the correct answer cannot be determined with certainty, choose the most likely one.
16
+
17
+ Output format:
18
+ {{
19
+ "reason": "Explanation of why the input belongs to the category"
20
+ "result": "<category_name_only>"
21
+ }}
22
+
23
+ Available categories with their descriptions:
24
+ {category_list}
25
+
26
+ Here is the text:
27
+ {text}
28
+
29
+ analyze_template: |
30
+ We want to categorize the given text.
31
+ To improve categorization, we need an analysis of the text.
32
+ Analyze the given text and write its main idea and a short analysis of that.
33
+ Analysis should be very short.
34
+
35
+ Here is the text:
36
+ {text}
@@ -0,0 +1,24 @@
1
+ main_template: |
2
+ You are an expert in determining whether a statement can be concluded from the source text or not.
3
+ You must return a boolean value: True or False.
4
+ Return True if the statement can be concluded from the source, and False otherwise.
5
+
6
+ Respond only in JSON format (Output should be a boolean):
7
+ {{"result": True/False}}
8
+
9
+ The statement is:
10
+ {text}
11
+
12
+ The source text is:
13
+ {source_text}
14
+
15
+ analyze_template: |
16
+ You should analyze a statement and a source text and provide a brief,
17
+ summarized analysis that could help in determining that can the statement
18
+ be concluded from the source or not.
19
+
20
+ The statement is:
21
+ {text}
22
+
23
+ The source text is:
24
+ {source_text}
@@ -1,7 +1,8 @@
1
1
  main_template: |
2
2
  You are a Named Entity Recognition (NER) extractor.
3
- Identify and extract all named entities (e.g., PER, ORG, LOC, DAT, etc.) from the given text.
3
+ Identify and extract {entities} from the given text.
4
4
  For each entity, provide its text and a clear type.
5
+
5
6
  Respond only in JSON format:
6
7
  {{
7
8
  "result": [
@@ -11,10 +12,13 @@ main_template: |
11
12
  }}
12
13
  ]
13
14
  }}
15
+
14
16
  Here is the text:
15
- {input}
17
+ {text}
16
18
 
17
19
  analyze_template: |
18
20
  Read the following text and identify any proper nouns, key concepts, or specific mentions that might represent named entities.
19
21
  Provide a brief, summarized analysis that could help in categorizing these entities.
20
- {input}
22
+
23
+ Here is the text:
24
+ {text}
@@ -3,16 +3,19 @@ main_template:
3
3
  auto: |
4
4
  You are an expert keyword extractor.
5
5
  Extract the most relevant keywords from the given text.
6
+
6
7
  Guidelines:
7
8
  - Keywords must represent the main concepts of the text.
8
9
  - If two words have overlapping meanings, choose only one.
9
10
  - Do not include generic or unrelated words.
10
11
  - Keywords must be single, self-contained words (no phrases).
11
12
  - Output between 3 and 7 keywords based on the input length.
12
- - Respond only in JSON format:
13
+
14
+ Respond only in JSON format:
13
15
  {{"result": ["keyword1", "keyword2", etc.]}}
16
+
14
17
  Here is the text:
15
- {input}
18
+ {text}
16
19
 
17
20
  threshold: |
18
21
  You are an expert keyword extractor specialized in fine-grained concept identification.
@@ -29,10 +32,12 @@ main_template:
29
32
  - Short texts (a few sentences): 3 keywords
30
33
  - Medium texts (1–4 paragraphs): 4–5 keywords
31
34
  - Long texts (more than 4 paragraphs): 6–7 keywords
32
- - Respond only in JSON format:
35
+
36
+ Respond only in JSON format:
33
37
  {{"result": ["keyword1", "keyword2", etc.]}}
38
+
34
39
  Here is the text:
35
- {input}
40
+ {text}
36
41
 
37
42
  count: |
38
43
  You are an expert keyword extractor with precise output requirements.
@@ -45,24 +50,31 @@ main_template:
45
50
  - If the text doesn't contain enough distinct keywords, include the most relevant ones even if some are less specific.
46
51
  - Keywords must be single words (no multi-word expressions).
47
52
  - Order keywords by relevance (most relevant first).
48
- - Respond only in JSON format:
53
+
54
+ Respond only in JSON format:
49
55
  {{"result": ["keyword1", "keyword2", "keyword3", ...]}}
50
56
 
51
57
  Here is the text:
52
- {input}
58
+ {text}
53
59
 
54
60
  analyze_template:
55
61
  auto: |
56
62
  Analyze the following text to identify its main topics, concepts, and important terms.
57
63
  Provide a concise summary of your findings that will help in extracting relevant keywords.
58
- {input}
64
+
65
+ Here is the text:
66
+ {text}
59
67
 
60
68
  threshold: |
61
69
  Analyze the following text to identify its main topics, concepts, and important terms.
62
70
  Provide a concise summary of your findings that will help in extracting relevant keywords.
63
- {input}
71
+
72
+ Here is the text:
73
+ {text}
64
74
 
65
75
  count: |
66
76
  Analyze the following text to identify its main topics, concepts, and important terms.
67
77
  Provide a concise summary of your findings that will help in extracting relevant keywords.
68
- {input}
78
+
79
+ Here is the text:
80
+ {text}
@@ -1,14 +1,18 @@
1
1
  main_template: |
2
2
  You are a question detector.
3
3
  Determine that if the given text contains any question or not.
4
+
4
5
  Respond only in JSON format (Output should be a boolean):
5
6
  {{"result": True/False}}
7
+
6
8
  Here is the text:
7
- {input}
9
+ {text}
8
10
 
9
11
  analyze_template: |
10
12
  We want to analyze this text snippet to see if it contains any question or request of some kind or not.
11
13
  Read the text, and reason about it being a request or not.
12
14
  Summerized, short answer.
13
- {input}
15
+
16
+ Here is the text:
17
+ {text}
14
18
 
@@ -4,15 +4,18 @@ main_template:
4
4
  You are a language expert.
5
5
  I will give you a list of questions that are semantically similar.
6
6
  Your task is to merge them into one unified question.
7
+
7
8
  Guidelines:
8
9
  - Preserves all the information and intent from the original questions.
9
10
  - Sounds natural, fluent, and concise.
10
11
  - Avoids redundancy or unnecessary repetition.
11
12
  - Does not omit any unique idea from the originals.
12
- - Respond only in JSON format:
13
+
14
+ Respond only in JSON format:
13
15
  {{"result": "string"}}
16
+
14
17
  Here is the questions:
15
- {input}
18
+ {text}
16
19
 
17
20
  reason: |
18
21
  You are an AI assistant helping to unify semantically similar questions.
@@ -20,10 +23,12 @@ main_template:
20
23
  Then, write one merged question that combines all their content clearly and naturally, without redundancy.
21
24
  Step 1: Extract key ideas.
22
25
  Step 2: Write the final merged question.
26
+
23
27
  Respond only in JSON format:
24
28
  {{"result": "string"}}
29
+
25
30
  Here is the questions:
26
- {input}
31
+ {text}
27
32
 
28
33
  analyze_template:
29
34
 
@@ -33,14 +38,16 @@ analyze_template:
33
38
  and the specific information they are seeking.
34
39
  Provide a brief, summarized understanding of the questions' meaning that
35
40
  will help in merging and rephrasing it accurately without changing its intent.
41
+
36
42
  Here is the question:
37
- {input}
43
+ {text}
38
44
 
39
45
  reason: |
40
46
  Analyze the following questions to identify their exact wording, phrasing,
41
47
  and the literal meaning it conveys.
42
48
  Provide a brief, summarized analysis of their linguistic structure and current meaning,
43
49
  which will then be used to create a new question containing all of their contents.
50
+
44
51
  Here is the question:
45
- {input}
52
+ {text}
46
53
 
@@ -0,0 +1,24 @@
1
+ main_template: |
2
+ You are an expert data analyst specializing in Information Extraction.
3
+ Your task is to extract a list of "Atomic Propositions" from the provided text.
4
+
5
+ Definition of Atomic Proposition:
6
+ A single, self-contained statement of fact that is concise and verifiable.
7
+
8
+ Strict Guidelines:
9
+ 1. Remove Meta-Data: STRICTLY EXCLUDE all citations, references, URLs, source attributions (e.g., "Source: makarem.ir"), and conversational fillers (e.g., "Based on the documents...", "In conclusion...").
10
+ 2. Resolve Context: Replace pronouns ("it", "this", "they") with the specific nouns they refer to. Each proposition must make sense in isolation.
11
+ 3. Preserve Logic: Keep conditions attached to their facts. Do not split a rule from its condition (e.g., "If X, then Y" should be one proposition).
12
+ 4. No Redundancy: Do not extract summary statements that merely repeat facts already listed.
13
+
14
+ Extract the atomic propositions from the following text:
15
+ {text}
16
+
17
+ analyze_template: |
18
+ We want to analyze this text snippet and think about where we can split sentence to atomic meaningful propositions.
19
+ An atomic proposition is a single, self-contained fact that is concise,
20
+ verifiable, and does not rely on external context.
21
+ You just have to think around the possible propositions in the text and how a proposition can be made.
22
+
23
+ Here is the text:
24
+ {text}
@@ -18,7 +18,7 @@ main_template:
18
18
  {{"result": "str"}}
19
19
 
20
20
  Anchor Text:
21
- "{input}"
21
+ "{text}"
22
22
 
23
23
  negative: |
24
24
  You are an AI assistant designed to generate high-quality training data for semantic text embedding models.
@@ -35,7 +35,7 @@ main_template:
35
35
  {{"result": "str"}}
36
36
 
37
37
  Anchor Text:
38
- "{input}"
38
+ "{text}"
39
39
 
40
40
  hard_negative: |
41
41
  You are an AI assistant designed to generate high-quality training data for semantic text embedding models.
@@ -52,12 +52,11 @@ main_template:
52
52
  - Make it Challenging: The difference should be subtle enough that it requires a deep understanding of the text to identify, not just a simple keyword mismatch.
53
53
  - Maintain Similar Length: The generated sentence should be of roughly the same length and level of detail as the Anchor.
54
54
 
55
-
56
55
  Respond only in JSON format:
57
56
  {{"result": "str"}}
58
57
 
59
58
  Anchor Text:
60
- "{input}"
59
+ "{text}"
61
60
 
62
61
 
63
62
  analyze_template:
@@ -73,8 +72,8 @@ analyze_template:
73
72
 
74
73
  Your analysis should capture the ESSENTIAL MEANING that must be preserved in any paraphrase.
75
74
 
76
- Text:
77
- {input}
75
+ Here is the text:
76
+ {text}
78
77
 
79
78
  negative: |
80
79
  Analyze the following text to identify its SPECIFIC TOPIC and DOMAIN for creating a high-quality NEGATIVE sample.
@@ -87,8 +86,8 @@ analyze_template:
87
86
 
88
87
  The goal is to find topics that are in the same domain but semantically unrelated to this specific text.
89
88
 
90
- Text:
91
- {input}
89
+ Here is the text:
90
+ {text}
92
91
 
93
92
  hard_negative: |
94
93
  Analyze this text to identify EXACTLY ONE ELEMENT that can be changed to create a hard-negative sample.
@@ -106,6 +105,6 @@ analyze_template:
106
105
  - Sentence structure
107
106
  - 80-90% of the vocabulary
108
107
 
109
- Text:
110
- {input}
108
+ Here is the text:
109
+ {text}
111
110