hamtaa-texttools 1.1.22__tar.gz → 1.1.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {hamtaa_texttools-1.1.22/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.23}/PKG-INFO +1 -1
  2. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23/hamtaa_texttools.egg-info}/PKG-INFO +1 -1
  3. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/pyproject.toml +1 -1
  4. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/tests/test_all_async_tools.py +2 -5
  5. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/tests/test_all_tools.py +3 -10
  6. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/tests/test_output_validation.py +2 -6
  7. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/batch/batch_config.py +2 -1
  8. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/batch/batch_manager.py +6 -6
  9. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/batch/batch_runner.py +6 -6
  10. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/async_operator.py +9 -12
  11. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/exceptions.py +0 -6
  12. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/operator_utils.py +0 -3
  13. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/prompt_loader.py +0 -5
  14. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/sync_operator.py +9 -12
  15. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/categorize.yaml +3 -2
  16. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/check_fact.yaml +5 -0
  17. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/extract_entities.yaml +4 -0
  18. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/extract_keywords.yaml +15 -3
  19. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/is_question.yaml +4 -0
  20. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/merge_questions.yaml +8 -1
  21. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/propositionize.yaml +2 -0
  22. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/rewrite.yaml +3 -4
  23. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/subject_to_question.yaml +5 -1
  24. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/summarize.yaml +4 -0
  25. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/text_to_question.yaml +4 -0
  26. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/translate.yaml +5 -0
  27. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/tools/async_tools.py +87 -101
  28. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/tools/sync_tools.py +87 -102
  29. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/LICENSE +0 -0
  30. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/MANIFEST.in +0 -0
  31. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/README.md +0 -0
  32. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/hamtaa_texttools.egg-info/SOURCES.txt +0 -0
  33. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  34. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/hamtaa_texttools.egg-info/requires.txt +0 -0
  35. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  36. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/setup.cfg +0 -0
  37. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/__init__.py +0 -0
  38. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/models.py +0 -0
  39. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/text_to_chunks.py +0 -0
  40. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/README.md +0 -0
  41. {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/run_custom.yaml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.22
3
+ Version: 1.1.23
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  License: MIT License
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.22
3
+ Version: 1.1.23
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "1.1.22"
7
+ version = "1.1.23"
8
8
  authors = [
9
9
  { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
10
10
  { name = "Montazer", email = "montazerh82@gmail.com" },
@@ -6,16 +6,13 @@ from openai import AsyncOpenAI
6
6
 
7
7
  from texttools import AsyncTheTool
8
8
 
9
- # Load environment variables from .env
10
9
  load_dotenv()
11
- API_KEY = os.getenv("OPENAI_API_KEY")
10
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
12
11
  BASE_URL = os.getenv("BASE_URL")
13
12
  MODEL = os.getenv("MODEL")
14
13
 
15
- # Create AsyncOpenAI client
16
- client = AsyncOpenAI(base_url=BASE_URL, api_key=API_KEY)
14
+ client = AsyncOpenAI(base_url=BASE_URL, api_key=OPENAI_API_KEY)
17
15
 
18
- # Create an instance of TheTool
19
16
  t = AsyncTheTool(client=client, model=MODEL)
20
17
 
21
18
 
@@ -6,24 +6,18 @@ from pydantic import BaseModel
6
6
 
7
7
  from texttools import TheTool, CategoryTree
8
8
 
9
- # Load environment variables from .env
10
9
  load_dotenv()
11
- API_KEY = os.getenv("OPENAI_API_KEY")
10
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
12
11
  BASE_URL = os.getenv("BASE_URL")
13
12
  MODEL = os.getenv("MODEL")
14
13
 
15
- # Create OpenAI client
16
- client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
14
+ client = OpenAI(base_url=BASE_URL, api_key=OPENAI_API_KEY)
17
15
 
18
- # Create an instance of TheTool
19
16
  t = TheTool(client=client, model=MODEL)
20
17
 
21
18
  # Categorizer: list mode
22
19
  category = t.categorize(
23
- "سلام حالت چطوره؟",
24
- categories=["هیچکدام", "دینی", "فلسفه"],
25
- logprobs=True,
26
- top_logprobs=3,
20
+ "سلام حالت چطوره؟", categories=["هیچکدام", "دینی", "فلسفه"], priority=3
27
21
  )
28
22
  print(repr(category))
29
23
 
@@ -56,7 +50,6 @@ entities = t.extract_entities(
56
50
  "Ali will be dead by the car crash",
57
51
  entities=["EVENT"],
58
52
  with_analysis=True,
59
- logprobs=True,
60
53
  )
61
54
  print(repr(entities))
62
55
 
@@ -6,16 +6,13 @@ from openai import OpenAI
6
6
 
7
7
  from texttools import TheTool
8
8
 
9
- # Load environment variables from .env
10
9
  load_dotenv()
11
- API_KEY = os.getenv("OPENAI_API_KEY")
10
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
12
11
  BASE_URL = os.getenv("BASE_URL")
13
12
  MODEL = os.getenv("MODEL")
14
13
 
15
- # Create OpenAI client
16
- client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
14
+ client = OpenAI(base_url=BASE_URL, api_key=OPENAI_API_KEY)
17
15
 
18
- # Create an instance of TheTool
19
16
  t = TheTool(client=client, model=MODEL)
20
17
 
21
18
 
@@ -24,7 +21,6 @@ def validate(result: Any) -> bool:
24
21
  return "چیست؟" not in result
25
22
 
26
23
 
27
- # Question from Text Generator
28
24
  question = t.text_to_question(
29
25
  "زندگی",
30
26
  output_lang="Persian",
@@ -1,3 +1,4 @@
1
+ from typing import Any
1
2
  from dataclasses import dataclass
2
3
  from collections.abc import Callable
3
4
 
@@ -10,7 +11,7 @@ def export_data(data) -> list[dict[str, str]]:
10
11
  return data
11
12
 
12
13
 
13
- def import_data(data) -> object:
14
+ def import_data(data) -> Any:
14
15
  """
15
16
  Takes the output and adds and aggregates it to the original structure.
16
17
  """
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import uuid
3
3
  from pathlib import Path
4
- from typing import Type, TypeVar
4
+ from typing import Type, TypeVar, Any
5
5
  import logging
6
6
 
7
7
  from pydantic import BaseModel
@@ -31,7 +31,7 @@ class BatchManager:
31
31
  prompt_template: str,
32
32
  state_dir: Path = Path(".batch_jobs"),
33
33
  custom_json_schema_obj_str: dict | None = None,
34
- **client_kwargs: object,
34
+ **client_kwargs: Any,
35
35
  ):
36
36
  self._client = client
37
37
  self._model = model
@@ -51,7 +51,7 @@ class BatchManager:
51
51
  def _state_file(self, job_name: str) -> Path:
52
52
  return self._state_dir / f"{job_name}.json"
53
53
 
54
- def _load_state(self, job_name: str) -> list[dict[str, object]]:
54
+ def _load_state(self, job_name: str) -> list[dict[str, Any]]:
55
55
  """
56
56
  Loads the state (job information) from the state file for the given job name.
57
57
  Returns an empty list if the state file does not exist.
@@ -62,7 +62,7 @@ class BatchManager:
62
62
  return json.load(f)
63
63
  return []
64
64
 
65
- def _save_state(self, job_name: str, jobs: list[dict[str, object]]) -> None:
65
+ def _save_state(self, job_name: str, jobs: list[dict[str, Any]]) -> None:
66
66
  """
67
67
  Saves the job state to the state file for the given job name.
68
68
  """
@@ -77,11 +77,11 @@ class BatchManager:
77
77
  if path.exists():
78
78
  path.unlink()
79
79
 
80
- def _build_task(self, text: str, idx: str) -> dict[str, object]:
80
+ def _build_task(self, text: str, idx: str) -> dict[str, Any]:
81
81
  """
82
82
  Builds a single task dictionary for the batch job, including the prompt, model, and response format configuration.
83
83
  """
84
- response_format_config: dict[str, object]
84
+ response_format_config: dict[str, Any]
85
85
 
86
86
  if self._custom_json_schema_obj_str:
87
87
  response_format_config = {
@@ -2,7 +2,7 @@ import json
2
2
  import os
3
3
  import time
4
4
  from pathlib import Path
5
- from typing import Type, TypeVar
5
+ from typing import Type, TypeVar, Any
6
6
  import logging
7
7
 
8
8
  from dotenv import load_dotenv
@@ -12,7 +12,7 @@ from pydantic import BaseModel
12
12
  from texttools.batch.batch_manager import BatchManager
13
13
  from texttools.batch.batch_config import BatchConfig
14
14
  from texttools.internals.models import Str
15
- from texttools.internals.exceptions import TextToolsError, ConfigurationError
15
+ from texttools.internals.exceptions import TextToolsError
16
16
 
17
17
  # Base Model type for output models
18
18
  T = TypeVar("T", bound=BaseModel)
@@ -38,7 +38,7 @@ class BatchRunner:
38
38
  self._output_model = output_model
39
39
  self._manager = self._init_manager()
40
40
  self._data = self._load_data()
41
- self._parts: list[list[dict[str, object]]] = []
41
+ self._parts: list[list[dict[str, Any]]] = []
42
42
  # Map part index to job name
43
43
  self._part_idx_to_job_name: dict[int, str] = {}
44
44
  # Track retry attempts per part
@@ -47,7 +47,7 @@ class BatchRunner:
47
47
  Path(self._config.BASE_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
48
48
 
49
49
  except Exception as e:
50
- raise ConfigurationError(f"Batch runner initialization failed: {e}")
50
+ raise TextToolsError(f"Batch runner initialization failed: {e}")
51
51
 
52
52
  def _init_manager(self) -> BatchManager:
53
53
  load_dotenv()
@@ -130,8 +130,8 @@ class BatchRunner:
130
130
 
131
131
  def _save_results(
132
132
  self,
133
- output_data: list[dict[str, object]] | dict[str, object],
134
- log: list[object],
133
+ output_data: list[dict[str, Any]] | dict[str, Any],
134
+ log: list[Any],
135
135
  part_idx: int,
136
136
  ):
137
137
  part_suffix = f"_part_{part_idx + 1}" if len(self._parts) > 1 else ""
@@ -1,4 +1,4 @@
1
- from typing import TypeVar, Type
1
+ from typing import TypeVar, Type, Any
2
2
  from collections.abc import Callable
3
3
 
4
4
  from openai import AsyncOpenAI
@@ -56,11 +56,11 @@ class AsyncOperator:
56
56
  temperature: float,
57
57
  logprobs: bool,
58
58
  top_logprobs: int,
59
- priority: int,
60
- ) -> tuple[T, object]:
59
+ priority: int | None,
60
+ ) -> tuple[T, Any]:
61
61
  """
62
62
  Parses a chat completion using OpenAI's structured output format.
63
- Returns both the parsed object and the raw completion for logprobs.
63
+ Returns both the parsed Any and the raw completion for logprobs.
64
64
  """
65
65
  try:
66
66
  request_kwargs = {
@@ -74,7 +74,7 @@ class AsyncOperator:
74
74
  request_kwargs["logprobs"] = True
75
75
  request_kwargs["top_logprobs"] = top_logprobs
76
76
 
77
- if priority:
77
+ if priority is not None:
78
78
  request_kwargs["extra_body"] = {"priority": priority}
79
79
 
80
80
  completion = await self._client.beta.chat.completions.parse(
@@ -106,11 +106,11 @@ class AsyncOperator:
106
106
  temperature: float,
107
107
  logprobs: bool,
108
108
  top_logprobs: int,
109
- validator: Callable[[object], bool] | None,
109
+ validator: Callable[[Any], bool] | None,
110
110
  max_validation_retries: int | None,
111
- priority: int,
111
+ priority: int | None,
112
112
  # Internal parameters
113
- prompt_file: str,
113
+ tool_name: str,
114
114
  output_model: Type[T],
115
115
  mode: str | None,
116
116
  **extra_kwargs,
@@ -119,12 +119,9 @@ class AsyncOperator:
119
119
  Execute the LLM pipeline with the given input text.
120
120
  """
121
121
  try:
122
- if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
123
- raise ValueError("top_logprobs should be an int greater than 1")
124
-
125
122
  prompt_loader = PromptLoader()
126
123
  prompt_configs = prompt_loader.load(
127
- prompt_file=prompt_file,
124
+ prompt_file=tool_name + ".yaml",
128
125
  text=text.strip(),
129
126
  mode=mode,
130
127
  **extra_kwargs,
@@ -20,9 +20,3 @@ class ValidationError(TextToolsError):
20
20
  """Errors from output validation."""
21
21
 
22
22
  pass
23
-
24
-
25
- class ConfigurationError(TextToolsError):
26
- """Errors from misconfiguration."""
27
-
28
- pass
@@ -67,9 +67,6 @@ class OperatorUtils:
67
67
 
68
68
  @staticmethod
69
69
  def get_retry_temp(base_temp: float) -> float:
70
- """
71
- Calculate temperature for retry attempts.
72
- """
73
70
  delta_temp = random.choice([-1, 1]) * random.uniform(0.1, 0.9)
74
71
  new_temp = base_temp + delta_temp
75
72
 
@@ -8,11 +8,6 @@ from texttools.internals.exceptions import PromptError
8
8
  class PromptLoader:
9
9
  """
10
10
  Utility for loading and formatting YAML prompt templates.
11
-
12
- Responsibilities:
13
- - Load and parse YAML prompt definitions.
14
- - Select the right template (by mode, if applicable).
15
- - Inject variables (`{text}`, plus any extra kwargs) into the templates.
16
11
  """
17
12
 
18
13
  MAIN_TEMPLATE = "main_template"
@@ -1,4 +1,4 @@
1
- from typing import TypeVar, Type
1
+ from typing import TypeVar, Type, Any
2
2
  from collections.abc import Callable
3
3
 
4
4
  from openai import OpenAI
@@ -56,11 +56,11 @@ class Operator:
56
56
  temperature: float,
57
57
  logprobs: bool,
58
58
  top_logprobs: int,
59
- priority: int,
60
- ) -> tuple[T, object]:
59
+ priority: int | None,
60
+ ) -> tuple[T, Any]:
61
61
  """
62
62
  Parses a chat completion using OpenAI's structured output format.
63
- Returns both the parsed object and the raw completion for logprobs.
63
+ Returns both the parsed Any and the raw completion for logprobs.
64
64
  """
65
65
  try:
66
66
  request_kwargs = {
@@ -74,7 +74,7 @@ class Operator:
74
74
  request_kwargs["logprobs"] = True
75
75
  request_kwargs["top_logprobs"] = top_logprobs
76
76
 
77
- if priority:
77
+ if priority is not None:
78
78
  request_kwargs["extra_body"] = {"priority": priority}
79
79
 
80
80
  completion = self._client.beta.chat.completions.parse(**request_kwargs)
@@ -104,11 +104,11 @@ class Operator:
104
104
  temperature: float,
105
105
  logprobs: bool,
106
106
  top_logprobs: int,
107
- validator: Callable[[object], bool] | None,
107
+ validator: Callable[[Any], bool] | None,
108
108
  max_validation_retries: int | None,
109
- priority: int,
109
+ priority: int | None,
110
110
  # Internal parameters
111
- prompt_file: str,
111
+ tool_name: str,
112
112
  output_model: Type[T],
113
113
  mode: str | None,
114
114
  **extra_kwargs,
@@ -117,12 +117,9 @@ class Operator:
117
117
  Execute the LLM pipeline with the given input text.
118
118
  """
119
119
  try:
120
- if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
121
- raise ValueError("top_logprobs should be an int greater than 1")
122
-
123
120
  prompt_loader = PromptLoader()
124
121
  prompt_configs = prompt_loader.load(
125
- prompt_file=prompt_file,
122
+ prompt_file=tool_name + ".yaml",
126
123
  text=text.strip(),
127
124
  mode=mode,
128
125
  **extra_kwargs,
@@ -23,7 +23,7 @@ main_template: |
23
23
  Available categories with their descriptions:
24
24
  {category_list}
25
25
 
26
- The text that has to be categorized:
26
+ Here is the text:
27
27
  {text}
28
28
 
29
29
  analyze_template: |
@@ -31,5 +31,6 @@ analyze_template: |
31
31
  To improve categorization, we need an analysis of the text.
32
32
  Analyze the given text and write its main idea and a short analysis of that.
33
33
  Analysis should be very short.
34
- Text:
34
+
35
+ Here is the text:
35
36
  {text}
@@ -2,10 +2,13 @@ main_template: |
2
2
  You are an expert in determining whether a statement can be concluded from the source text or not.
3
3
  You must return a boolean value: True or False.
4
4
  Return True if the statement can be concluded from the source, and False otherwise.
5
+
5
6
  Respond only in JSON format (Output should be a boolean):
6
7
  {{"result": True/False}}
8
+
7
9
  The statement is:
8
10
  {text}
11
+
9
12
  The source text is:
10
13
  {source_text}
11
14
 
@@ -13,7 +16,9 @@ analyze_template: |
13
16
  You should analyze a statement and a source text and provide a brief,
14
17
  summarized analysis that could help in determining that can the statement
15
18
  be concluded from the source or not.
19
+
16
20
  The statement is:
17
21
  {text}
22
+
18
23
  The source text is:
19
24
  {source_text}
@@ -2,6 +2,7 @@ main_template: |
2
2
  You are a Named Entity Recognition (NER) extractor.
3
3
  Identify and extract {entities} from the given text.
4
4
  For each entity, provide its text and a clear type.
5
+
5
6
  Respond only in JSON format:
6
7
  {{
7
8
  "result": [
@@ -11,10 +12,13 @@ main_template: |
11
12
  }}
12
13
  ]
13
14
  }}
15
+
14
16
  Here is the text:
15
17
  {text}
16
18
 
17
19
  analyze_template: |
18
20
  Read the following text and identify any proper nouns, key concepts, or specific mentions that might represent named entities.
19
21
  Provide a brief, summarized analysis that could help in categorizing these entities.
22
+
23
+ Here is the text:
20
24
  {text}
@@ -3,14 +3,17 @@ main_template:
3
3
  auto: |
4
4
  You are an expert keyword extractor.
5
5
  Extract the most relevant keywords from the given text.
6
+
6
7
  Guidelines:
7
8
  - Keywords must represent the main concepts of the text.
8
9
  - If two words have overlapping meanings, choose only one.
9
10
  - Do not include generic or unrelated words.
10
11
  - Keywords must be single, self-contained words (no phrases).
11
12
  - Output between 3 and 7 keywords based on the input length.
12
- - Respond only in JSON format:
13
+
14
+ Respond only in JSON format:
13
15
  {{"result": ["keyword1", "keyword2", etc.]}}
16
+
14
17
  Here is the text:
15
18
  {text}
16
19
 
@@ -29,8 +32,10 @@ main_template:
29
32
  - Short texts (a few sentences): 3 keywords
30
33
  - Medium texts (1–4 paragraphs): 4–5 keywords
31
34
  - Long texts (more than 4 paragraphs): 6–7 keywords
32
- - Respond only in JSON format:
35
+
36
+ Respond only in JSON format:
33
37
  {{"result": ["keyword1", "keyword2", etc.]}}
38
+
34
39
  Here is the text:
35
40
  {text}
36
41
 
@@ -45,7 +50,8 @@ main_template:
45
50
  - If the text doesn't contain enough distinct keywords, include the most relevant ones even if some are less specific.
46
51
  - Keywords must be single words (no multi-word expressions).
47
52
  - Order keywords by relevance (most relevant first).
48
- - Respond only in JSON format:
53
+
54
+ Respond only in JSON format:
49
55
  {{"result": ["keyword1", "keyword2", "keyword3", ...]}}
50
56
 
51
57
  Here is the text:
@@ -55,14 +61,20 @@ analyze_template:
55
61
  auto: |
56
62
  Analyze the following text to identify its main topics, concepts, and important terms.
57
63
  Provide a concise summary of your findings that will help in extracting relevant keywords.
64
+
65
+ Here is the text:
58
66
  {text}
59
67
 
60
68
  threshold: |
61
69
  Analyze the following text to identify its main topics, concepts, and important terms.
62
70
  Provide a concise summary of your findings that will help in extracting relevant keywords.
71
+
72
+ Here is the text:
63
73
  {text}
64
74
 
65
75
  count: |
66
76
  Analyze the following text to identify its main topics, concepts, and important terms.
67
77
  Provide a concise summary of your findings that will help in extracting relevant keywords.
78
+
79
+ Here is the text:
68
80
  {text}
@@ -1,8 +1,10 @@
1
1
  main_template: |
2
2
  You are a question detector.
3
3
  Determine that if the given text contains any question or not.
4
+
4
5
  Respond only in JSON format (Output should be a boolean):
5
6
  {{"result": True/False}}
7
+
6
8
  Here is the text:
7
9
  {text}
8
10
 
@@ -10,5 +12,7 @@ analyze_template: |
10
12
  We want to analyze this text snippet to see if it contains any question or request of some kind or not.
11
13
  Read the text, and reason about it being a request or not.
12
14
  Summerized, short answer.
15
+
16
+ Here is the text:
13
17
  {text}
14
18
 
@@ -4,13 +4,16 @@ main_template:
4
4
  You are a language expert.
5
5
  I will give you a list of questions that are semantically similar.
6
6
  Your task is to merge them into one unified question.
7
+
7
8
  Guidelines:
8
9
  - Preserves all the information and intent from the original questions.
9
10
  - Sounds natural, fluent, and concise.
10
11
  - Avoids redundancy or unnecessary repetition.
11
12
  - Does not omit any unique idea from the originals.
12
- - Respond only in JSON format:
13
+
14
+ Respond only in JSON format:
13
15
  {{"result": "string"}}
16
+
14
17
  Here is the questions:
15
18
  {text}
16
19
 
@@ -20,8 +23,10 @@ main_template:
20
23
  Then, write one merged question that combines all their content clearly and naturally, without redundancy.
21
24
  Step 1: Extract key ideas.
22
25
  Step 2: Write the final merged question.
26
+
23
27
  Respond only in JSON format:
24
28
  {{"result": "string"}}
29
+
25
30
  Here is the questions:
26
31
  {text}
27
32
 
@@ -33,6 +38,7 @@ analyze_template:
33
38
  and the specific information they are seeking.
34
39
  Provide a brief, summarized understanding of the questions' meaning that
35
40
  will help in merging and rephrasing it accurately without changing its intent.
41
+
36
42
  Here is the question:
37
43
  {text}
38
44
 
@@ -41,6 +47,7 @@ analyze_template:
41
47
  and the literal meaning it conveys.
42
48
  Provide a brief, summarized analysis of their linguistic structure and current meaning,
43
49
  which will then be used to create a new question containing all of their contents.
50
+
44
51
  Here is the question:
45
52
  {text}
46
53
 
@@ -19,4 +19,6 @@ analyze_template: |
19
19
  An atomic proposition is a single, self-contained fact that is concise,
20
20
  verifiable, and does not rely on external context.
21
21
  You just have to think around the possible propositions in the text and how a proposition can be made.
22
+
23
+ Here is the text:
22
24
  {text}
@@ -52,7 +52,6 @@ main_template:
52
52
  - Make it Challenging: The difference should be subtle enough that it requires a deep understanding of the text to identify, not just a simple keyword mismatch.
53
53
  - Maintain Similar Length: The generated sentence should be of roughly the same length and level of detail as the Anchor.
54
54
 
55
-
56
55
  Respond only in JSON format:
57
56
  {{"result": "str"}}
58
57
 
@@ -73,7 +72,7 @@ analyze_template:
73
72
 
74
73
  Your analysis should capture the ESSENTIAL MEANING that must be preserved in any paraphrase.
75
74
 
76
- Text:
75
+ Here is the text:
77
76
  {text}
78
77
 
79
78
  negative: |
@@ -87,7 +86,7 @@ analyze_template:
87
86
 
88
87
  The goal is to find topics that are in the same domain but semantically unrelated to this specific text.
89
88
 
90
- Text:
89
+ Here is the text:
91
90
  {text}
92
91
 
93
92
  hard_negative: |
@@ -106,6 +105,6 @@ analyze_template:
106
105
  - Sentence structure
107
106
  - 80-90% of the vocabulary
108
107
 
109
- Text:
108
+ Here is the text:
110
109
  {text}
111
110
 
@@ -3,12 +3,15 @@ main_template: |
3
3
  Given the following subject, generate {number_of_questions} appropriate questions that this subject would directly respond to.
4
4
  The generated subject should be independently meaningful,
5
5
  and it must not mention any verbs like, this, that, he or she and etc. in the question.
6
+
6
7
  There is a `reason` key, fill that up with a summerized version of your thoughts.
7
8
  The `reason` must be less than 20 words.
8
9
  Don't forget to fill the reason.
10
+
9
11
  Respond only in JSON format:
10
12
  {{"result": ["question1", "question2", ...], "reason": "string"}}
11
- Here is the text:
13
+
14
+ Here is the subject:
12
15
  {text}
13
16
 
14
17
  analyze_template: |
@@ -18,5 +21,6 @@ analyze_template: |
18
21
  We need a summerized analysis of the subject.
19
22
  What is the subject about?
20
23
  What point of views can we see and generate questoins from it? (Questions that real users might have.)
24
+
21
25
  Here is the subject:
22
26
  {text}
@@ -1,8 +1,10 @@
1
1
  main_template: |
2
2
  You are a summarizer.
3
3
  You must summarize the given text, preserving its meaning.
4
+
4
5
  Respond only in JSON format:
5
6
  {{"result": "string"}}
7
+
6
8
  Provide a concise summary of the following text:
7
9
  {text}
8
10
 
@@ -10,5 +12,7 @@ main_template: |
10
12
  analyze_template: |
11
13
  Read the following text and identify its main points, key arguments, and overall purpose.
12
14
  Provide a brief, summarized analysis that will help in generating an accurate and concise summary.
15
+
16
+ Here is the text:
13
17
  {text}
14
18
 
@@ -3,11 +3,14 @@ main_template: |
3
3
  Given the following answer, generate {number_of_questions} appropriate questions that this answer would directly respond to.
4
4
  The generated answer should be independently meaningful,
5
5
  and not mentioning any verbs like, this, that, he or she on the question.
6
+
6
7
  There is a `reason` key, fill that up with a summerized version of your thoughts.
7
8
  The `reason` must be less than 20 words.
8
9
  Don't forget to fill the reason.
10
+
9
11
  Respond only in JSON format:
10
12
  {{"result": ["question1", "question2", ...], "reason": "string"}}
13
+
11
14
  Here is the answer:
12
15
  {text}
13
16
 
@@ -17,6 +20,7 @@ analyze_template: |
17
20
  Provide a brief, summarized understanding of the answer's content that will
18
21
  help in formulating relevant and direct questions.
19
22
  Just mention the keypoints that was provided in the answer
23
+
20
24
  Here is the answer:
21
25
  {text}
22
26