hamtaa-texttools 1.1.22__tar.gz → 1.1.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.22/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.23}/PKG-INFO +1 -1
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23/hamtaa_texttools.egg-info}/PKG-INFO +1 -1
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/pyproject.toml +1 -1
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/tests/test_all_async_tools.py +2 -5
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/tests/test_all_tools.py +3 -10
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/tests/test_output_validation.py +2 -6
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/batch/batch_config.py +2 -1
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/batch/batch_manager.py +6 -6
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/batch/batch_runner.py +6 -6
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/async_operator.py +9 -12
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/exceptions.py +0 -6
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/operator_utils.py +0 -3
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/prompt_loader.py +0 -5
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/sync_operator.py +9 -12
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/categorize.yaml +3 -2
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/check_fact.yaml +5 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/extract_entities.yaml +4 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/extract_keywords.yaml +15 -3
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/is_question.yaml +4 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/merge_questions.yaml +8 -1
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/propositionize.yaml +2 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/rewrite.yaml +3 -4
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/subject_to_question.yaml +5 -1
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/summarize.yaml +4 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/text_to_question.yaml +4 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/translate.yaml +5 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/tools/async_tools.py +87 -101
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/tools/sync_tools.py +87 -102
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/LICENSE +0 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/MANIFEST.in +0 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/README.md +0 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/hamtaa_texttools.egg-info/SOURCES.txt +0 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/hamtaa_texttools.egg-info/requires.txt +0 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/hamtaa_texttools.egg-info/top_level.txt +0 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/setup.cfg +0 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/__init__.py +0 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/models.py +0 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/internals/text_to_chunks.py +0 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/README.md +0 -0
- {hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/run_custom.yaml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.23
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.23
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -6,16 +6,13 @@ from openai import AsyncOpenAI
|
|
|
6
6
|
|
|
7
7
|
from texttools import AsyncTheTool
|
|
8
8
|
|
|
9
|
-
# Load environment variables from .env
|
|
10
9
|
load_dotenv()
|
|
11
|
-
|
|
10
|
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
12
11
|
BASE_URL = os.getenv("BASE_URL")
|
|
13
12
|
MODEL = os.getenv("MODEL")
|
|
14
13
|
|
|
15
|
-
|
|
16
|
-
client = AsyncOpenAI(base_url=BASE_URL, api_key=API_KEY)
|
|
14
|
+
client = AsyncOpenAI(base_url=BASE_URL, api_key=OPENAI_API_KEY)
|
|
17
15
|
|
|
18
|
-
# Create an instance of TheTool
|
|
19
16
|
t = AsyncTheTool(client=client, model=MODEL)
|
|
20
17
|
|
|
21
18
|
|
|
@@ -6,24 +6,18 @@ from pydantic import BaseModel
|
|
|
6
6
|
|
|
7
7
|
from texttools import TheTool, CategoryTree
|
|
8
8
|
|
|
9
|
-
# Load environment variables from .env
|
|
10
9
|
load_dotenv()
|
|
11
|
-
|
|
10
|
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
12
11
|
BASE_URL = os.getenv("BASE_URL")
|
|
13
12
|
MODEL = os.getenv("MODEL")
|
|
14
13
|
|
|
15
|
-
|
|
16
|
-
client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
|
|
14
|
+
client = OpenAI(base_url=BASE_URL, api_key=OPENAI_API_KEY)
|
|
17
15
|
|
|
18
|
-
# Create an instance of TheTool
|
|
19
16
|
t = TheTool(client=client, model=MODEL)
|
|
20
17
|
|
|
21
18
|
# Categorizer: list mode
|
|
22
19
|
category = t.categorize(
|
|
23
|
-
"سلام حالت چطوره؟",
|
|
24
|
-
categories=["هیچکدام", "دینی", "فلسفه"],
|
|
25
|
-
logprobs=True,
|
|
26
|
-
top_logprobs=3,
|
|
20
|
+
"سلام حالت چطوره؟", categories=["هیچکدام", "دینی", "فلسفه"], priority=3
|
|
27
21
|
)
|
|
28
22
|
print(repr(category))
|
|
29
23
|
|
|
@@ -56,7 +50,6 @@ entities = t.extract_entities(
|
|
|
56
50
|
"Ali will be dead by the car crash",
|
|
57
51
|
entities=["EVENT"],
|
|
58
52
|
with_analysis=True,
|
|
59
|
-
logprobs=True,
|
|
60
53
|
)
|
|
61
54
|
print(repr(entities))
|
|
62
55
|
|
|
@@ -6,16 +6,13 @@ from openai import OpenAI
|
|
|
6
6
|
|
|
7
7
|
from texttools import TheTool
|
|
8
8
|
|
|
9
|
-
# Load environment variables from .env
|
|
10
9
|
load_dotenv()
|
|
11
|
-
|
|
10
|
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
12
11
|
BASE_URL = os.getenv("BASE_URL")
|
|
13
12
|
MODEL = os.getenv("MODEL")
|
|
14
13
|
|
|
15
|
-
|
|
16
|
-
client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
|
|
14
|
+
client = OpenAI(base_url=BASE_URL, api_key=OPENAI_API_KEY)
|
|
17
15
|
|
|
18
|
-
# Create an instance of TheTool
|
|
19
16
|
t = TheTool(client=client, model=MODEL)
|
|
20
17
|
|
|
21
18
|
|
|
@@ -24,7 +21,6 @@ def validate(result: Any) -> bool:
|
|
|
24
21
|
return "چیست؟" not in result
|
|
25
22
|
|
|
26
23
|
|
|
27
|
-
# Question from Text Generator
|
|
28
24
|
question = t.text_to_question(
|
|
29
25
|
"زندگی",
|
|
30
26
|
output_lang="Persian",
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from typing import Any
|
|
1
2
|
from dataclasses import dataclass
|
|
2
3
|
from collections.abc import Callable
|
|
3
4
|
|
|
@@ -10,7 +11,7 @@ def export_data(data) -> list[dict[str, str]]:
|
|
|
10
11
|
return data
|
|
11
12
|
|
|
12
13
|
|
|
13
|
-
def import_data(data) ->
|
|
14
|
+
def import_data(data) -> Any:
|
|
14
15
|
"""
|
|
15
16
|
Takes the output and adds and aggregates it to the original structure.
|
|
16
17
|
"""
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import uuid
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Type, TypeVar
|
|
4
|
+
from typing import Type, TypeVar, Any
|
|
5
5
|
import logging
|
|
6
6
|
|
|
7
7
|
from pydantic import BaseModel
|
|
@@ -31,7 +31,7 @@ class BatchManager:
|
|
|
31
31
|
prompt_template: str,
|
|
32
32
|
state_dir: Path = Path(".batch_jobs"),
|
|
33
33
|
custom_json_schema_obj_str: dict | None = None,
|
|
34
|
-
**client_kwargs:
|
|
34
|
+
**client_kwargs: Any,
|
|
35
35
|
):
|
|
36
36
|
self._client = client
|
|
37
37
|
self._model = model
|
|
@@ -51,7 +51,7 @@ class BatchManager:
|
|
|
51
51
|
def _state_file(self, job_name: str) -> Path:
|
|
52
52
|
return self._state_dir / f"{job_name}.json"
|
|
53
53
|
|
|
54
|
-
def _load_state(self, job_name: str) -> list[dict[str,
|
|
54
|
+
def _load_state(self, job_name: str) -> list[dict[str, Any]]:
|
|
55
55
|
"""
|
|
56
56
|
Loads the state (job information) from the state file for the given job name.
|
|
57
57
|
Returns an empty list if the state file does not exist.
|
|
@@ -62,7 +62,7 @@ class BatchManager:
|
|
|
62
62
|
return json.load(f)
|
|
63
63
|
return []
|
|
64
64
|
|
|
65
|
-
def _save_state(self, job_name: str, jobs: list[dict[str,
|
|
65
|
+
def _save_state(self, job_name: str, jobs: list[dict[str, Any]]) -> None:
|
|
66
66
|
"""
|
|
67
67
|
Saves the job state to the state file for the given job name.
|
|
68
68
|
"""
|
|
@@ -77,11 +77,11 @@ class BatchManager:
|
|
|
77
77
|
if path.exists():
|
|
78
78
|
path.unlink()
|
|
79
79
|
|
|
80
|
-
def _build_task(self, text: str, idx: str) -> dict[str,
|
|
80
|
+
def _build_task(self, text: str, idx: str) -> dict[str, Any]:
|
|
81
81
|
"""
|
|
82
82
|
Builds a single task dictionary for the batch job, including the prompt, model, and response format configuration.
|
|
83
83
|
"""
|
|
84
|
-
response_format_config: dict[str,
|
|
84
|
+
response_format_config: dict[str, Any]
|
|
85
85
|
|
|
86
86
|
if self._custom_json_schema_obj_str:
|
|
87
87
|
response_format_config = {
|
|
@@ -2,7 +2,7 @@ import json
|
|
|
2
2
|
import os
|
|
3
3
|
import time
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Type, TypeVar
|
|
5
|
+
from typing import Type, TypeVar, Any
|
|
6
6
|
import logging
|
|
7
7
|
|
|
8
8
|
from dotenv import load_dotenv
|
|
@@ -12,7 +12,7 @@ from pydantic import BaseModel
|
|
|
12
12
|
from texttools.batch.batch_manager import BatchManager
|
|
13
13
|
from texttools.batch.batch_config import BatchConfig
|
|
14
14
|
from texttools.internals.models import Str
|
|
15
|
-
from texttools.internals.exceptions import TextToolsError
|
|
15
|
+
from texttools.internals.exceptions import TextToolsError
|
|
16
16
|
|
|
17
17
|
# Base Model type for output models
|
|
18
18
|
T = TypeVar("T", bound=BaseModel)
|
|
@@ -38,7 +38,7 @@ class BatchRunner:
|
|
|
38
38
|
self._output_model = output_model
|
|
39
39
|
self._manager = self._init_manager()
|
|
40
40
|
self._data = self._load_data()
|
|
41
|
-
self._parts: list[list[dict[str,
|
|
41
|
+
self._parts: list[list[dict[str, Any]]] = []
|
|
42
42
|
# Map part index to job name
|
|
43
43
|
self._part_idx_to_job_name: dict[int, str] = {}
|
|
44
44
|
# Track retry attempts per part
|
|
@@ -47,7 +47,7 @@ class BatchRunner:
|
|
|
47
47
|
Path(self._config.BASE_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
|
|
48
48
|
|
|
49
49
|
except Exception as e:
|
|
50
|
-
raise
|
|
50
|
+
raise TextToolsError(f"Batch runner initialization failed: {e}")
|
|
51
51
|
|
|
52
52
|
def _init_manager(self) -> BatchManager:
|
|
53
53
|
load_dotenv()
|
|
@@ -130,8 +130,8 @@ class BatchRunner:
|
|
|
130
130
|
|
|
131
131
|
def _save_results(
|
|
132
132
|
self,
|
|
133
|
-
output_data: list[dict[str,
|
|
134
|
-
log: list[
|
|
133
|
+
output_data: list[dict[str, Any]] | dict[str, Any],
|
|
134
|
+
log: list[Any],
|
|
135
135
|
part_idx: int,
|
|
136
136
|
):
|
|
137
137
|
part_suffix = f"_part_{part_idx + 1}" if len(self._parts) > 1 else ""
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import TypeVar, Type
|
|
1
|
+
from typing import TypeVar, Type, Any
|
|
2
2
|
from collections.abc import Callable
|
|
3
3
|
|
|
4
4
|
from openai import AsyncOpenAI
|
|
@@ -56,11 +56,11 @@ class AsyncOperator:
|
|
|
56
56
|
temperature: float,
|
|
57
57
|
logprobs: bool,
|
|
58
58
|
top_logprobs: int,
|
|
59
|
-
priority: int,
|
|
60
|
-
) -> tuple[T,
|
|
59
|
+
priority: int | None,
|
|
60
|
+
) -> tuple[T, Any]:
|
|
61
61
|
"""
|
|
62
62
|
Parses a chat completion using OpenAI's structured output format.
|
|
63
|
-
Returns both the parsed
|
|
63
|
+
Returns both the parsed Any and the raw completion for logprobs.
|
|
64
64
|
"""
|
|
65
65
|
try:
|
|
66
66
|
request_kwargs = {
|
|
@@ -74,7 +74,7 @@ class AsyncOperator:
|
|
|
74
74
|
request_kwargs["logprobs"] = True
|
|
75
75
|
request_kwargs["top_logprobs"] = top_logprobs
|
|
76
76
|
|
|
77
|
-
if priority:
|
|
77
|
+
if priority is not None:
|
|
78
78
|
request_kwargs["extra_body"] = {"priority": priority}
|
|
79
79
|
|
|
80
80
|
completion = await self._client.beta.chat.completions.parse(
|
|
@@ -106,11 +106,11 @@ class AsyncOperator:
|
|
|
106
106
|
temperature: float,
|
|
107
107
|
logprobs: bool,
|
|
108
108
|
top_logprobs: int,
|
|
109
|
-
validator: Callable[[
|
|
109
|
+
validator: Callable[[Any], bool] | None,
|
|
110
110
|
max_validation_retries: int | None,
|
|
111
|
-
priority: int,
|
|
111
|
+
priority: int | None,
|
|
112
112
|
# Internal parameters
|
|
113
|
-
|
|
113
|
+
tool_name: str,
|
|
114
114
|
output_model: Type[T],
|
|
115
115
|
mode: str | None,
|
|
116
116
|
**extra_kwargs,
|
|
@@ -119,12 +119,9 @@ class AsyncOperator:
|
|
|
119
119
|
Execute the LLM pipeline with the given input text.
|
|
120
120
|
"""
|
|
121
121
|
try:
|
|
122
|
-
if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
|
|
123
|
-
raise ValueError("top_logprobs should be an int greater than 1")
|
|
124
|
-
|
|
125
122
|
prompt_loader = PromptLoader()
|
|
126
123
|
prompt_configs = prompt_loader.load(
|
|
127
|
-
prompt_file=
|
|
124
|
+
prompt_file=tool_name + ".yaml",
|
|
128
125
|
text=text.strip(),
|
|
129
126
|
mode=mode,
|
|
130
127
|
**extra_kwargs,
|
|
@@ -8,11 +8,6 @@ from texttools.internals.exceptions import PromptError
|
|
|
8
8
|
class PromptLoader:
|
|
9
9
|
"""
|
|
10
10
|
Utility for loading and formatting YAML prompt templates.
|
|
11
|
-
|
|
12
|
-
Responsibilities:
|
|
13
|
-
- Load and parse YAML prompt definitions.
|
|
14
|
-
- Select the right template (by mode, if applicable).
|
|
15
|
-
- Inject variables (`{text}`, plus any extra kwargs) into the templates.
|
|
16
11
|
"""
|
|
17
12
|
|
|
18
13
|
MAIN_TEMPLATE = "main_template"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import TypeVar, Type
|
|
1
|
+
from typing import TypeVar, Type, Any
|
|
2
2
|
from collections.abc import Callable
|
|
3
3
|
|
|
4
4
|
from openai import OpenAI
|
|
@@ -56,11 +56,11 @@ class Operator:
|
|
|
56
56
|
temperature: float,
|
|
57
57
|
logprobs: bool,
|
|
58
58
|
top_logprobs: int,
|
|
59
|
-
priority: int,
|
|
60
|
-
) -> tuple[T,
|
|
59
|
+
priority: int | None,
|
|
60
|
+
) -> tuple[T, Any]:
|
|
61
61
|
"""
|
|
62
62
|
Parses a chat completion using OpenAI's structured output format.
|
|
63
|
-
Returns both the parsed
|
|
63
|
+
Returns both the parsed Any and the raw completion for logprobs.
|
|
64
64
|
"""
|
|
65
65
|
try:
|
|
66
66
|
request_kwargs = {
|
|
@@ -74,7 +74,7 @@ class Operator:
|
|
|
74
74
|
request_kwargs["logprobs"] = True
|
|
75
75
|
request_kwargs["top_logprobs"] = top_logprobs
|
|
76
76
|
|
|
77
|
-
if priority:
|
|
77
|
+
if priority is not None:
|
|
78
78
|
request_kwargs["extra_body"] = {"priority": priority}
|
|
79
79
|
|
|
80
80
|
completion = self._client.beta.chat.completions.parse(**request_kwargs)
|
|
@@ -104,11 +104,11 @@ class Operator:
|
|
|
104
104
|
temperature: float,
|
|
105
105
|
logprobs: bool,
|
|
106
106
|
top_logprobs: int,
|
|
107
|
-
validator: Callable[[
|
|
107
|
+
validator: Callable[[Any], bool] | None,
|
|
108
108
|
max_validation_retries: int | None,
|
|
109
|
-
priority: int,
|
|
109
|
+
priority: int | None,
|
|
110
110
|
# Internal parameters
|
|
111
|
-
|
|
111
|
+
tool_name: str,
|
|
112
112
|
output_model: Type[T],
|
|
113
113
|
mode: str | None,
|
|
114
114
|
**extra_kwargs,
|
|
@@ -117,12 +117,9 @@ class Operator:
|
|
|
117
117
|
Execute the LLM pipeline with the given input text.
|
|
118
118
|
"""
|
|
119
119
|
try:
|
|
120
|
-
if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
|
|
121
|
-
raise ValueError("top_logprobs should be an int greater than 1")
|
|
122
|
-
|
|
123
120
|
prompt_loader = PromptLoader()
|
|
124
121
|
prompt_configs = prompt_loader.load(
|
|
125
|
-
prompt_file=
|
|
122
|
+
prompt_file=tool_name + ".yaml",
|
|
126
123
|
text=text.strip(),
|
|
127
124
|
mode=mode,
|
|
128
125
|
**extra_kwargs,
|
|
@@ -23,7 +23,7 @@ main_template: |
|
|
|
23
23
|
Available categories with their descriptions:
|
|
24
24
|
{category_list}
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
Here is the text:
|
|
27
27
|
{text}
|
|
28
28
|
|
|
29
29
|
analyze_template: |
|
|
@@ -31,5 +31,6 @@ analyze_template: |
|
|
|
31
31
|
To improve categorization, we need an analysis of the text.
|
|
32
32
|
Analyze the given text and write its main idea and a short analysis of that.
|
|
33
33
|
Analysis should be very short.
|
|
34
|
-
|
|
34
|
+
|
|
35
|
+
Here is the text:
|
|
35
36
|
{text}
|
|
@@ -2,10 +2,13 @@ main_template: |
|
|
|
2
2
|
You are an expert in determining whether a statement can be concluded from the source text or not.
|
|
3
3
|
You must return a boolean value: True or False.
|
|
4
4
|
Return True if the statement can be concluded from the source, and False otherwise.
|
|
5
|
+
|
|
5
6
|
Respond only in JSON format (Output should be a boolean):
|
|
6
7
|
{{"result": True/False}}
|
|
8
|
+
|
|
7
9
|
The statement is:
|
|
8
10
|
{text}
|
|
11
|
+
|
|
9
12
|
The source text is:
|
|
10
13
|
{source_text}
|
|
11
14
|
|
|
@@ -13,7 +16,9 @@ analyze_template: |
|
|
|
13
16
|
You should analyze a statement and a source text and provide a brief,
|
|
14
17
|
summarized analysis that could help in determining that can the statement
|
|
15
18
|
be concluded from the source or not.
|
|
19
|
+
|
|
16
20
|
The statement is:
|
|
17
21
|
{text}
|
|
22
|
+
|
|
18
23
|
The source text is:
|
|
19
24
|
{source_text}
|
|
@@ -2,6 +2,7 @@ main_template: |
|
|
|
2
2
|
You are a Named Entity Recognition (NER) extractor.
|
|
3
3
|
Identify and extract {entities} from the given text.
|
|
4
4
|
For each entity, provide its text and a clear type.
|
|
5
|
+
|
|
5
6
|
Respond only in JSON format:
|
|
6
7
|
{{
|
|
7
8
|
"result": [
|
|
@@ -11,10 +12,13 @@ main_template: |
|
|
|
11
12
|
}}
|
|
12
13
|
]
|
|
13
14
|
}}
|
|
15
|
+
|
|
14
16
|
Here is the text:
|
|
15
17
|
{text}
|
|
16
18
|
|
|
17
19
|
analyze_template: |
|
|
18
20
|
Read the following text and identify any proper nouns, key concepts, or specific mentions that might represent named entities.
|
|
19
21
|
Provide a brief, summarized analysis that could help in categorizing these entities.
|
|
22
|
+
|
|
23
|
+
Here is the text:
|
|
20
24
|
{text}
|
|
@@ -3,14 +3,17 @@ main_template:
|
|
|
3
3
|
auto: |
|
|
4
4
|
You are an expert keyword extractor.
|
|
5
5
|
Extract the most relevant keywords from the given text.
|
|
6
|
+
|
|
6
7
|
Guidelines:
|
|
7
8
|
- Keywords must represent the main concepts of the text.
|
|
8
9
|
- If two words have overlapping meanings, choose only one.
|
|
9
10
|
- Do not include generic or unrelated words.
|
|
10
11
|
- Keywords must be single, self-contained words (no phrases).
|
|
11
12
|
- Output between 3 and 7 keywords based on the input length.
|
|
12
|
-
|
|
13
|
+
|
|
14
|
+
Respond only in JSON format:
|
|
13
15
|
{{"result": ["keyword1", "keyword2", etc.]}}
|
|
16
|
+
|
|
14
17
|
Here is the text:
|
|
15
18
|
{text}
|
|
16
19
|
|
|
@@ -29,8 +32,10 @@ main_template:
|
|
|
29
32
|
- Short texts (a few sentences): 3 keywords
|
|
30
33
|
- Medium texts (1–4 paragraphs): 4–5 keywords
|
|
31
34
|
- Long texts (more than 4 paragraphs): 6–7 keywords
|
|
32
|
-
|
|
35
|
+
|
|
36
|
+
Respond only in JSON format:
|
|
33
37
|
{{"result": ["keyword1", "keyword2", etc.]}}
|
|
38
|
+
|
|
34
39
|
Here is the text:
|
|
35
40
|
{text}
|
|
36
41
|
|
|
@@ -45,7 +50,8 @@ main_template:
|
|
|
45
50
|
- If the text doesn't contain enough distinct keywords, include the most relevant ones even if some are less specific.
|
|
46
51
|
- Keywords must be single words (no multi-word expressions).
|
|
47
52
|
- Order keywords by relevance (most relevant first).
|
|
48
|
-
|
|
53
|
+
|
|
54
|
+
Respond only in JSON format:
|
|
49
55
|
{{"result": ["keyword1", "keyword2", "keyword3", ...]}}
|
|
50
56
|
|
|
51
57
|
Here is the text:
|
|
@@ -55,14 +61,20 @@ analyze_template:
|
|
|
55
61
|
auto: |
|
|
56
62
|
Analyze the following text to identify its main topics, concepts, and important terms.
|
|
57
63
|
Provide a concise summary of your findings that will help in extracting relevant keywords.
|
|
64
|
+
|
|
65
|
+
Here is the text:
|
|
58
66
|
{text}
|
|
59
67
|
|
|
60
68
|
threshold: |
|
|
61
69
|
Analyze the following text to identify its main topics, concepts, and important terms.
|
|
62
70
|
Provide a concise summary of your findings that will help in extracting relevant keywords.
|
|
71
|
+
|
|
72
|
+
Here is the text:
|
|
63
73
|
{text}
|
|
64
74
|
|
|
65
75
|
count: |
|
|
66
76
|
Analyze the following text to identify its main topics, concepts, and important terms.
|
|
67
77
|
Provide a concise summary of your findings that will help in extracting relevant keywords.
|
|
78
|
+
|
|
79
|
+
Here is the text:
|
|
68
80
|
{text}
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
main_template: |
|
|
2
2
|
You are a question detector.
|
|
3
3
|
Determine that if the given text contains any question or not.
|
|
4
|
+
|
|
4
5
|
Respond only in JSON format (Output should be a boolean):
|
|
5
6
|
{{"result": True/False}}
|
|
7
|
+
|
|
6
8
|
Here is the text:
|
|
7
9
|
{text}
|
|
8
10
|
|
|
@@ -10,5 +12,7 @@ analyze_template: |
|
|
|
10
12
|
We want to analyze this text snippet to see if it contains any question or request of some kind or not.
|
|
11
13
|
Read the text, and reason about it being a request or not.
|
|
12
14
|
Summerized, short answer.
|
|
15
|
+
|
|
16
|
+
Here is the text:
|
|
13
17
|
{text}
|
|
14
18
|
|
|
@@ -4,13 +4,16 @@ main_template:
|
|
|
4
4
|
You are a language expert.
|
|
5
5
|
I will give you a list of questions that are semantically similar.
|
|
6
6
|
Your task is to merge them into one unified question.
|
|
7
|
+
|
|
7
8
|
Guidelines:
|
|
8
9
|
- Preserves all the information and intent from the original questions.
|
|
9
10
|
- Sounds natural, fluent, and concise.
|
|
10
11
|
- Avoids redundancy or unnecessary repetition.
|
|
11
12
|
- Does not omit any unique idea from the originals.
|
|
12
|
-
|
|
13
|
+
|
|
14
|
+
Respond only in JSON format:
|
|
13
15
|
{{"result": "string"}}
|
|
16
|
+
|
|
14
17
|
Here is the questions:
|
|
15
18
|
{text}
|
|
16
19
|
|
|
@@ -20,8 +23,10 @@ main_template:
|
|
|
20
23
|
Then, write one merged question that combines all their content clearly and naturally, without redundancy.
|
|
21
24
|
Step 1: Extract key ideas.
|
|
22
25
|
Step 2: Write the final merged question.
|
|
26
|
+
|
|
23
27
|
Respond only in JSON format:
|
|
24
28
|
{{"result": "string"}}
|
|
29
|
+
|
|
25
30
|
Here is the questions:
|
|
26
31
|
{text}
|
|
27
32
|
|
|
@@ -33,6 +38,7 @@ analyze_template:
|
|
|
33
38
|
and the specific information they are seeking.
|
|
34
39
|
Provide a brief, summarized understanding of the questions' meaning that
|
|
35
40
|
will help in merging and rephrasing it accurately without changing its intent.
|
|
41
|
+
|
|
36
42
|
Here is the question:
|
|
37
43
|
{text}
|
|
38
44
|
|
|
@@ -41,6 +47,7 @@ analyze_template:
|
|
|
41
47
|
and the literal meaning it conveys.
|
|
42
48
|
Provide a brief, summarized analysis of their linguistic structure and current meaning,
|
|
43
49
|
which will then be used to create a new question containing all of their contents.
|
|
50
|
+
|
|
44
51
|
Here is the question:
|
|
45
52
|
{text}
|
|
46
53
|
|
|
@@ -19,4 +19,6 @@ analyze_template: |
|
|
|
19
19
|
An atomic proposition is a single, self-contained fact that is concise,
|
|
20
20
|
verifiable, and does not rely on external context.
|
|
21
21
|
You just have to think around the possible propositions in the text and how a proposition can be made.
|
|
22
|
+
|
|
23
|
+
Here is the text:
|
|
22
24
|
{text}
|
|
@@ -52,7 +52,6 @@ main_template:
|
|
|
52
52
|
- Make it Challenging: The difference should be subtle enough that it requires a deep understanding of the text to identify, not just a simple keyword mismatch.
|
|
53
53
|
- Maintain Similar Length: The generated sentence should be of roughly the same length and level of detail as the Anchor.
|
|
54
54
|
|
|
55
|
-
|
|
56
55
|
Respond only in JSON format:
|
|
57
56
|
{{"result": "str"}}
|
|
58
57
|
|
|
@@ -73,7 +72,7 @@ analyze_template:
|
|
|
73
72
|
|
|
74
73
|
Your analysis should capture the ESSENTIAL MEANING that must be preserved in any paraphrase.
|
|
75
74
|
|
|
76
|
-
|
|
75
|
+
Here is the text:
|
|
77
76
|
{text}
|
|
78
77
|
|
|
79
78
|
negative: |
|
|
@@ -87,7 +86,7 @@ analyze_template:
|
|
|
87
86
|
|
|
88
87
|
The goal is to find topics that are in the same domain but semantically unrelated to this specific text.
|
|
89
88
|
|
|
90
|
-
|
|
89
|
+
Here is the text:
|
|
91
90
|
{text}
|
|
92
91
|
|
|
93
92
|
hard_negative: |
|
|
@@ -106,6 +105,6 @@ analyze_template:
|
|
|
106
105
|
- Sentence structure
|
|
107
106
|
- 80-90% of the vocabulary
|
|
108
107
|
|
|
109
|
-
|
|
108
|
+
Here is the text:
|
|
110
109
|
{text}
|
|
111
110
|
|
{hamtaa_texttools-1.1.22 → hamtaa_texttools-1.1.23}/texttools/prompts/subject_to_question.yaml
RENAMED
|
@@ -3,12 +3,15 @@ main_template: |
|
|
|
3
3
|
Given the following subject, generate {number_of_questions} appropriate questions that this subject would directly respond to.
|
|
4
4
|
The generated subject should be independently meaningful,
|
|
5
5
|
and it must not mention any verbs like, this, that, he or she and etc. in the question.
|
|
6
|
+
|
|
6
7
|
There is a `reason` key, fill that up with a summerized version of your thoughts.
|
|
7
8
|
The `reason` must be less than 20 words.
|
|
8
9
|
Don't forget to fill the reason.
|
|
10
|
+
|
|
9
11
|
Respond only in JSON format:
|
|
10
12
|
{{"result": ["question1", "question2", ...], "reason": "string"}}
|
|
11
|
-
|
|
13
|
+
|
|
14
|
+
Here is the subject:
|
|
12
15
|
{text}
|
|
13
16
|
|
|
14
17
|
analyze_template: |
|
|
@@ -18,5 +21,6 @@ analyze_template: |
|
|
|
18
21
|
We need a summerized analysis of the subject.
|
|
19
22
|
What is the subject about?
|
|
20
23
|
What point of views can we see and generate questoins from it? (Questions that real users might have.)
|
|
24
|
+
|
|
21
25
|
Here is the subject:
|
|
22
26
|
{text}
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
main_template: |
|
|
2
2
|
You are a summarizer.
|
|
3
3
|
You must summarize the given text, preserving its meaning.
|
|
4
|
+
|
|
4
5
|
Respond only in JSON format:
|
|
5
6
|
{{"result": "string"}}
|
|
7
|
+
|
|
6
8
|
Provide a concise summary of the following text:
|
|
7
9
|
{text}
|
|
8
10
|
|
|
@@ -10,5 +12,7 @@ main_template: |
|
|
|
10
12
|
analyze_template: |
|
|
11
13
|
Read the following text and identify its main points, key arguments, and overall purpose.
|
|
12
14
|
Provide a brief, summarized analysis that will help in generating an accurate and concise summary.
|
|
15
|
+
|
|
16
|
+
Here is the text:
|
|
13
17
|
{text}
|
|
14
18
|
|
|
@@ -3,11 +3,14 @@ main_template: |
|
|
|
3
3
|
Given the following answer, generate {number_of_questions} appropriate questions that this answer would directly respond to.
|
|
4
4
|
The generated answer should be independently meaningful,
|
|
5
5
|
and not mentioning any verbs like, this, that, he or she on the question.
|
|
6
|
+
|
|
6
7
|
There is a `reason` key, fill that up with a summerized version of your thoughts.
|
|
7
8
|
The `reason` must be less than 20 words.
|
|
8
9
|
Don't forget to fill the reason.
|
|
10
|
+
|
|
9
11
|
Respond only in JSON format:
|
|
10
12
|
{{"result": ["question1", "question2", ...], "reason": "string"}}
|
|
13
|
+
|
|
11
14
|
Here is the answer:
|
|
12
15
|
{text}
|
|
13
16
|
|
|
@@ -17,6 +20,7 @@ analyze_template: |
|
|
|
17
20
|
Provide a brief, summarized understanding of the answer's content that will
|
|
18
21
|
help in formulating relevant and direct questions.
|
|
19
22
|
Just mention the keypoints that was provided in the answer
|
|
23
|
+
|
|
20
24
|
Here is the answer:
|
|
21
25
|
{text}
|
|
22
26
|
|