hamtaa-texttools 0.1.43__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (67) hide show
  1. hamtaa_texttools-0.1.43/PKG-INFO +60 -0
  2. hamtaa_texttools-0.1.43/README.md +50 -0
  3. hamtaa_texttools-0.1.43/hamtaa_texttools.egg-info/PKG-INFO +60 -0
  4. hamtaa_texttools-0.1.43/hamtaa_texttools.egg-info/SOURCES.txt +65 -0
  5. hamtaa_texttools-0.1.43/hamtaa_texttools.egg-info/dependency_links.txt +1 -0
  6. hamtaa_texttools-0.1.43/hamtaa_texttools.egg-info/requires.txt +2 -0
  7. hamtaa_texttools-0.1.43/hamtaa_texttools.egg-info/top_level.txt +1 -0
  8. hamtaa_texttools-0.1.43/pyproject.toml +24 -0
  9. hamtaa_texttools-0.1.43/setup.cfg +4 -0
  10. hamtaa_texttools-0.1.43/setup.py +4 -0
  11. hamtaa_texttools-0.1.43/tests/test_vllm_output.py +79 -0
  12. hamtaa_texttools-0.1.43/texttools/__init__.py +26 -0
  13. hamtaa_texttools-0.1.43/texttools/base/__init__.py +3 -0
  14. hamtaa_texttools-0.1.43/texttools/base/base_categorizer.py +40 -0
  15. hamtaa_texttools-0.1.43/texttools/base/base_keyword_extractor.py +35 -0
  16. hamtaa_texttools-0.1.43/texttools/base/base_ner_extractor.py +61 -0
  17. hamtaa_texttools-0.1.43/texttools/base/base_question_detector.py +35 -0
  18. hamtaa_texttools-0.1.43/texttools/base/base_question_generator.py +99 -0
  19. hamtaa_texttools-0.1.43/texttools/base/base_question_merger.py +59 -0
  20. hamtaa_texttools-0.1.43/texttools/base/base_question_rewriter.py +61 -0
  21. hamtaa_texttools-0.1.43/texttools/base/base_router.py +33 -0
  22. hamtaa_texttools-0.1.43/texttools/base/base_summarizer.py +55 -0
  23. hamtaa_texttools-0.1.43/texttools/base/base_task_performer.py +53 -0
  24. hamtaa_texttools-0.1.43/texttools/base/base_translator.py +38 -0
  25. hamtaa_texttools-0.1.43/texttools/batch_manager/__init__.py +2 -0
  26. hamtaa_texttools-0.1.43/texttools/batch_manager/batch_manager.py +241 -0
  27. hamtaa_texttools-0.1.43/texttools/batch_manager/batch_runner.py +207 -0
  28. hamtaa_texttools-0.1.43/texttools/formatter/__init__.py +1 -0
  29. hamtaa_texttools-0.1.43/texttools/formatter/base.py +26 -0
  30. hamtaa_texttools-0.1.43/texttools/formatter/gemma3_formatter.py +51 -0
  31. hamtaa_texttools-0.1.43/texttools/handlers/__init__.py +6 -0
  32. hamtaa_texttools-0.1.43/texttools/handlers/categorizer/__init__.py +6 -0
  33. hamtaa_texttools-0.1.43/texttools/handlers/categorizer/categorizer.py +61 -0
  34. hamtaa_texttools-0.1.43/texttools/handlers/handlers.py +88 -0
  35. hamtaa_texttools-0.1.43/texttools/tools/__init__.py +33 -0
  36. hamtaa_texttools-0.1.43/texttools/tools/categorizer/__init__.py +2 -0
  37. hamtaa_texttools-0.1.43/texttools/tools/categorizer/encoder_model/__init__.py +1 -0
  38. hamtaa_texttools-0.1.43/texttools/tools/categorizer/encoder_model/encoder_vectorizer.py +51 -0
  39. hamtaa_texttools-0.1.43/texttools/tools/categorizer/llm/__init__.py +2 -0
  40. hamtaa_texttools-0.1.43/texttools/tools/categorizer/llm/gemma_categorizer.py +169 -0
  41. hamtaa_texttools-0.1.43/texttools/tools/categorizer/llm/openai_categorizer.py +80 -0
  42. hamtaa_texttools-0.1.43/texttools/tools/keyword_extractor/__init__.py +1 -0
  43. hamtaa_texttools-0.1.43/texttools/tools/keyword_extractor/gemma_extractor.py +138 -0
  44. hamtaa_texttools-0.1.43/texttools/tools/merger/__init__.py +2 -0
  45. hamtaa_texttools-0.1.43/texttools/tools/merger/gemma_question_merger.py +214 -0
  46. hamtaa_texttools-0.1.43/texttools/tools/ner/__init__.py +1 -0
  47. hamtaa_texttools-0.1.43/texttools/tools/ner/gemma_ner_extractor.py +157 -0
  48. hamtaa_texttools-0.1.43/texttools/tools/question_detector/__init__.py +2 -0
  49. hamtaa_texttools-0.1.43/texttools/tools/question_detector/gemma_detector.py +130 -0
  50. hamtaa_texttools-0.1.43/texttools/tools/question_detector/llm_detector.py +112 -0
  51. hamtaa_texttools-0.1.43/texttools/tools/question_generator/__init__.py +1 -0
  52. hamtaa_texttools-0.1.43/texttools/tools/question_generator/gemma_question_generator.py +198 -0
  53. hamtaa_texttools-0.1.43/texttools/tools/reranker/__init__.py +3 -0
  54. hamtaa_texttools-0.1.43/texttools/tools/reranker/reranker.py +137 -0
  55. hamtaa_texttools-0.1.43/texttools/tools/reranker/scorer.py +216 -0
  56. hamtaa_texttools-0.1.43/texttools/tools/reranker/sorter.py +278 -0
  57. hamtaa_texttools-0.1.43/texttools/tools/rewriter/__init__.py +2 -0
  58. hamtaa_texttools-0.1.43/texttools/tools/rewriter/gemma_question_rewriter.py +213 -0
  59. hamtaa_texttools-0.1.43/texttools/tools/router/__init__.py +0 -0
  60. hamtaa_texttools-0.1.43/texttools/tools/router/gemma_router.py +169 -0
  61. hamtaa_texttools-0.1.43/texttools/tools/subject_to_question/__init__.py +1 -0
  62. hamtaa_texttools-0.1.43/texttools/tools/subject_to_question/gemma_question_generator.py +224 -0
  63. hamtaa_texttools-0.1.43/texttools/tools/summarizer/__init__.py +2 -0
  64. hamtaa_texttools-0.1.43/texttools/tools/summarizer/gemma_summarizer.py +140 -0
  65. hamtaa_texttools-0.1.43/texttools/tools/summarizer/llm_summerizer.py +108 -0
  66. hamtaa_texttools-0.1.43/texttools/tools/translator/__init__.py +1 -0
  67. hamtaa_texttools-0.1.43/texttools/tools/translator/gemma_translator.py +202 -0
@@ -0,0 +1,60 @@
1
+ Metadata-Version: 2.4
2
+ Name: hamtaa-texttools
3
+ Version: 0.1.43
4
+ Summary: A set of high-level NLP tools
5
+ Author: Tohidi, Montazer, Givechi, Mousavinezhad
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: openai==1.97.1
9
+ Requires-Dist: numpy==1.26.4
10
+
11
+ # Text Tools
12
+
13
+ <p align="center">
14
+ <img src="https://img.shields.io/badge/TextTools-Python%20Text%20Processing-black?style=for-the-badge&logo=python&logoColor=white">
15
+ </p>
16
+
17
+
18
+ <p align="center">
19
+ <img src="docs/logo.png" alt="Preview" width="300" height="300">
20
+ </p>
21
+
22
+
23
+ ### How to Install
24
+
25
+ Install the package using:
26
+
27
+ ```bash
28
+ pip install -U hamta-texttools
29
+ ```
30
+
31
+
32
+ ---
33
+
34
+ ## What This Library Is *Not*
35
+
36
+ This is **not** a collection of low-level utilities.
37
+
38
+ To clarify: this library **does not** include things like:
39
+ - An standard `regex`
40
+ - Word normalization utilities
41
+
42
+ ---
43
+
44
+ ## What This Library *Provides*
45
+
46
+ This is a set of **high-level natural language processing (NLP)** tools.
47
+
48
+ Some of the features include:
49
+ - `question_detector`: Detecting if an incoming text is a question or not
50
+ - `categorizer`: No finetuning need, categorizer
51
+ - ... (Tell me what you want!)
52
+
53
+ ---
54
+
55
+ ## When to Use This Library
56
+
57
+ Use `texttools` when:
58
+ - You need to **process large volumes of data using OpenAI’s GPT models** via the BATCH API.
59
+ - You want to treat an **LLM as a function** in Python that outputs structured JSON or Pydantic models.
60
+ - You need to **categorize large datasets** using vector embeddings, efficiently and at scale.
@@ -0,0 +1,50 @@
1
+ # Text Tools
2
+
3
+ <p align="center">
4
+ <img src="https://img.shields.io/badge/TextTools-Python%20Text%20Processing-black?style=for-the-badge&logo=python&logoColor=white">
5
+ </p>
6
+
7
+
8
+ <p align="center">
9
+ <img src="docs/logo.png" alt="Preview" width="300" height="300">
10
+ </p>
11
+
12
+
13
+ ### How to Install
14
+
15
+ Install the package using:
16
+
17
+ ```bash
18
+ pip install -U hamta-texttools
19
+ ```
20
+
21
+
22
+ ---
23
+
24
+ ## What This Library Is *Not*
25
+
26
+ This is **not** a collection of low-level utilities.
27
+
28
+ To clarify: this library **does not** include things like:
29
+ - An standard `regex`
30
+ - Word normalization utilities
31
+
32
+ ---
33
+
34
+ ## What This Library *Provides*
35
+
36
+ This is a set of **high-level natural language processing (NLP)** tools.
37
+
38
+ Some of the features include:
39
+ - `question_detector`: Detecting if an incoming text is a question or not
40
+ - `categorizer`: No finetuning need, categorizer
41
+ - ... (Tell me what you want!)
42
+
43
+ ---
44
+
45
+ ## When to Use This Library
46
+
47
+ Use `texttools` when:
48
+ - You need to **process large volumes of data using OpenAI’s GPT models** via the BATCH API.
49
+ - You want to treat an **LLM as a function** in Python that outputs structured JSON or Pydantic models.
50
+ - You need to **categorize large datasets** using vector embeddings, efficiently and at scale.
@@ -0,0 +1,60 @@
1
+ Metadata-Version: 2.4
2
+ Name: hamtaa-texttools
3
+ Version: 0.1.43
4
+ Summary: A set of high-level NLP tools
5
+ Author: Tohidi, Montazer, Givechi, Mousavinezhad
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: openai==1.97.1
9
+ Requires-Dist: numpy==1.26.4
10
+
11
+ # Text Tools
12
+
13
+ <p align="center">
14
+ <img src="https://img.shields.io/badge/TextTools-Python%20Text%20Processing-black?style=for-the-badge&logo=python&logoColor=white">
15
+ </p>
16
+
17
+
18
+ <p align="center">
19
+ <img src="docs/logo.png" alt="Preview" width="300" height="300">
20
+ </p>
21
+
22
+
23
+ ### How to Install
24
+
25
+ Install the package using:
26
+
27
+ ```bash
28
+ pip install -U hamta-texttools
29
+ ```
30
+
31
+
32
+ ---
33
+
34
+ ## What This Library Is *Not*
35
+
36
+ This is **not** a collection of low-level utilities.
37
+
38
+ To clarify: this library **does not** include things like:
39
+ - An standard `regex`
40
+ - Word normalization utilities
41
+
42
+ ---
43
+
44
+ ## What This Library *Provides*
45
+
46
+ This is a set of **high-level natural language processing (NLP)** tools.
47
+
48
+ Some of the features include:
49
+ - `question_detector`: Detecting if an incoming text is a question or not
50
+ - `categorizer`: No finetuning need, categorizer
51
+ - ... (Tell me what you want!)
52
+
53
+ ---
54
+
55
+ ## When to Use This Library
56
+
57
+ Use `texttools` when:
58
+ - You need to **process large volumes of data using OpenAI’s GPT models** via the BATCH API.
59
+ - You want to treat an **LLM as a function** in Python that outputs structured JSON or Pydantic models.
60
+ - You need to **categorize large datasets** using vector embeddings, efficiently and at scale.
@@ -0,0 +1,65 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.py
4
+ hamtaa_texttools.egg-info/PKG-INFO
5
+ hamtaa_texttools.egg-info/SOURCES.txt
6
+ hamtaa_texttools.egg-info/dependency_links.txt
7
+ hamtaa_texttools.egg-info/requires.txt
8
+ hamtaa_texttools.egg-info/top_level.txt
9
+ tests/test_vllm_output.py
10
+ texttools/__init__.py
11
+ texttools/base/__init__.py
12
+ texttools/base/base_categorizer.py
13
+ texttools/base/base_keyword_extractor.py
14
+ texttools/base/base_ner_extractor.py
15
+ texttools/base/base_question_detector.py
16
+ texttools/base/base_question_generator.py
17
+ texttools/base/base_question_merger.py
18
+ texttools/base/base_question_rewriter.py
19
+ texttools/base/base_router.py
20
+ texttools/base/base_summarizer.py
21
+ texttools/base/base_task_performer.py
22
+ texttools/base/base_translator.py
23
+ texttools/batch_manager/__init__.py
24
+ texttools/batch_manager/batch_manager.py
25
+ texttools/batch_manager/batch_runner.py
26
+ texttools/formatter/__init__.py
27
+ texttools/formatter/base.py
28
+ texttools/formatter/gemma3_formatter.py
29
+ texttools/handlers/__init__.py
30
+ texttools/handlers/handlers.py
31
+ texttools/handlers/categorizer/__init__.py
32
+ texttools/handlers/categorizer/categorizer.py
33
+ texttools/tools/__init__.py
34
+ texttools/tools/categorizer/__init__.py
35
+ texttools/tools/categorizer/encoder_model/__init__.py
36
+ texttools/tools/categorizer/encoder_model/encoder_vectorizer.py
37
+ texttools/tools/categorizer/llm/__init__.py
38
+ texttools/tools/categorizer/llm/gemma_categorizer.py
39
+ texttools/tools/categorizer/llm/openai_categorizer.py
40
+ texttools/tools/keyword_extractor/__init__.py
41
+ texttools/tools/keyword_extractor/gemma_extractor.py
42
+ texttools/tools/merger/__init__.py
43
+ texttools/tools/merger/gemma_question_merger.py
44
+ texttools/tools/ner/__init__.py
45
+ texttools/tools/ner/gemma_ner_extractor.py
46
+ texttools/tools/question_detector/__init__.py
47
+ texttools/tools/question_detector/gemma_detector.py
48
+ texttools/tools/question_detector/llm_detector.py
49
+ texttools/tools/question_generator/__init__.py
50
+ texttools/tools/question_generator/gemma_question_generator.py
51
+ texttools/tools/reranker/__init__.py
52
+ texttools/tools/reranker/reranker.py
53
+ texttools/tools/reranker/scorer.py
54
+ texttools/tools/reranker/sorter.py
55
+ texttools/tools/rewriter/__init__.py
56
+ texttools/tools/rewriter/gemma_question_rewriter.py
57
+ texttools/tools/router/__init__.py
58
+ texttools/tools/router/gemma_router.py
59
+ texttools/tools/subject_to_question/__init__.py
60
+ texttools/tools/subject_to_question/gemma_question_generator.py
61
+ texttools/tools/summarizer/__init__.py
62
+ texttools/tools/summarizer/gemma_summarizer.py
63
+ texttools/tools/summarizer/llm_summerizer.py
64
+ texttools/tools/translator/__init__.py
65
+ texttools/tools/translator/gemma_translator.py
@@ -0,0 +1,2 @@
1
+ openai==1.97.1
2
+ numpy==1.26.4
@@ -0,0 +1,24 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hamtaa-texttools"
7
+ version = "0.1.43"
8
+ description = "A set of high-level NLP tools"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ authors = [
12
+ { name = "Tohidi" },
13
+ { name = "Montazer" },
14
+ { name = "Givechi" },
15
+ { name = "Mousavinezhad" }
16
+ ]
17
+ dependencies = [
18
+ "openai==1.97.1",
19
+ "numpy==1.26.4",
20
+ ]
21
+
22
+ [tool.setuptools.packages.find]
23
+ where = ["."]
24
+ include = ["texttools*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,4 @@
1
+ # setup.py
2
+ from setuptools import setup
3
+
4
+ setup()
@@ -0,0 +1,79 @@
1
+ import json
2
+ from typing import Literal
3
+
4
+ from dotenv import load_dotenv
5
+ from openai import OpenAI
6
+ from pydantic import BaseModel
7
+
8
+ load_dotenv()
9
+
10
+ client = OpenAI()
11
+
12
+
13
+ class Output(BaseModel):
14
+ reason: str
15
+ tag: Literal["Positive", "Negative"]
16
+
17
+
18
+ messages = [
19
+ {
20
+ "role": "user",
21
+ "content": """
22
+ هدف ما طبقه بندی متن هست
23
+ متن رو بخون و ایده اصلی و آنالیزی کوتاه از اون رو ارائه بده
24
+
25
+ بسیار خلاصه باشه خروجی تو
26
+ نهایتا 20 کلمه
27
+
28
+ در نهایت یکی از تگ هارو انتخاب کن
29
+
30
+ متن:
31
+
32
+ امروز میخواهم به خونه برگردم!!
33
+ """,
34
+ }
35
+ ]
36
+
37
+
38
+ def run_parse():
39
+ return client.beta.chat.completions.parse(
40
+ model="gemma-3",
41
+ messages=messages,
42
+ response_format=Output,
43
+ extra_body=dict(guided_decoding_backend="auto"),
44
+ )
45
+
46
+
47
+ def run_json_schema():
48
+ return client.chat.completions.create(
49
+ model="gemma-3",
50
+ messages=messages,
51
+ response_format={
52
+ "type": "json_schema",
53
+ "json_schema": {
54
+ "name": "output-schema",
55
+ "schema": Output.model_json_schema(),
56
+ },
57
+ },
58
+ extra_body=dict(guided_decoding_backend="auto"),
59
+ )
60
+
61
+
62
+ def main():
63
+ # Run parse() and print JSON dict
64
+ parsed_response = run_parse()
65
+
66
+ parsed_response = parsed_response.choices[0].message
67
+ parsed_response = parsed_response.parsed
68
+
69
+ print(parsed_response)
70
+
71
+ # Run json_schema and parse + print JSON dict
72
+ json_schema_response = run_json_schema()
73
+ raw_content = json_schema_response.choices[0].message.content
74
+ content_json = json.loads(raw_content)
75
+ print(json.dumps(content_json, ensure_ascii=False, indent=2))
76
+
77
+
78
+ if __name__ == "__main__":
79
+ main()
@@ -0,0 +1,26 @@
1
+ from texttools.batch_manager import BatchJobRunner, SimpleBatchManager
2
+ from texttools.handlers import (
3
+ NoOpResultHandler,
4
+ PrintResultHandler,
5
+ ResultHandler,
6
+ SaveToFileResultHandler,
7
+ )
8
+ from texttools.tools.categorizer.encoder_model.encoder_vectorizer import (
9
+ EmbeddingCategorizer,
10
+ )
11
+ from texttools.tools.categorizer.llm.openai_categorizer import LLMCategorizer
12
+ from texttools.tools.question_detector.llm_detector import LLMQuestionDetector
13
+ from texttools.tools.summarizer import LLMSummarizer
14
+
15
+ __all__ = [
16
+ "LLMQuestionDetector",
17
+ "NoOpResultHandler",
18
+ "PrintResultHandler",
19
+ "ResultHandler",
20
+ "SaveToFileResultHandler",
21
+ "EmbeddingCategorizer",
22
+ "LLMCategorizer",
23
+ "SimpleBatchManager",
24
+ "BatchJobRunner",
25
+ "LLMSummarizer",
26
+ ]
@@ -0,0 +1,3 @@
1
+ from texttools.base.base_categorizer import BaseCategorizer
2
+ from texttools.base.base_question_detector import BaseQuestionDetector
3
+ from texttools.base.base_summarizer import BaseSummarizer
@@ -0,0 +1,40 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from enum import Enum
4
+ from typing import Optional
5
+
6
+ from texttools.handlers import NoOpResultHandler, ResultHandler
7
+
8
+
9
+ class BaseCategorizer(ABC):
10
+ def __init__(
11
+ self,
12
+ handlers: Optional[list[ResultHandler]] = None,
13
+ ):
14
+ """
15
+ handlers: List of ResultHandler objects that will process results after categorization.
16
+ """
17
+ self.handlers = handlers or [NoOpResultHandler()]
18
+
19
+ @abstractmethod
20
+ def categorize(self, text: str) -> Enum:
21
+ """
22
+ Categorize the input text.
23
+ Must return one of the Enum members defined in self.categories.
24
+ """
25
+ pass
26
+
27
+ def preprocess(self, text: str) -> str:
28
+ """
29
+ Optional: Preprocess text before categorization.
30
+ """
31
+ return text
32
+
33
+ def _dispatch(self, results: dict) -> None:
34
+ for handler in self.handlers:
35
+ try:
36
+ handler.handle(results)
37
+ except Exception:
38
+ logging.error(
39
+ f"Handler {handler.__class__.__name__} failed", exc_info=True
40
+ )
@@ -0,0 +1,35 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, Optional
3
+
4
+
5
+ class BaseKeywordExtractor(ABC):
6
+ """
7
+ Base class for all detectors that output a list of keywords.
8
+ """
9
+
10
+ def __init__(
11
+ self,
12
+ handlers: Optional[list[Any]] = None,
13
+ ):
14
+ self.handlers = handlers or []
15
+
16
+ @abstractmethod
17
+ def extract_keywords(self, text: str) -> list[str]:
18
+ """
19
+ Extract keywords from the input text.
20
+ Should return a list of strings, where each string is a keyword.
21
+ """
22
+ pass
23
+
24
+ def preprocess(self, text: str) -> str:
25
+ """
26
+ Optional text preprocessing step.
27
+ """
28
+ return text.strip()
29
+
30
+ def _dispatch(self, result: dict) -> None:
31
+ """
32
+ Dispatch the result to handlers.
33
+ """
34
+ for handler in self.handlers:
35
+ handler.handle(result)
@@ -0,0 +1,61 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from typing import Any, Optional
4
+
5
+
6
+ class BaseNERExtractor(ABC):
7
+ """
8
+ Base class for all Named Entity Recognition (NER) systems.
9
+ """
10
+
11
+ def __init__(self, handlers: Optional[list[Any]] = None):
12
+ """
13
+ Initializes the BaseNERExtractor with optional result handlers.
14
+
15
+ :param handlers: Optional list of handlers to process the NER results.
16
+ """
17
+ self.handlers = handlers or []
18
+
19
+ @abstractmethod
20
+ def extract_entities(self, text: str) -> list[dict[str, str]]:
21
+ """
22
+ Extracts named entities from the input text.
23
+
24
+ :param text: The text from which to extract entities.
25
+ :return: A list of dictionaries, where each dictionary represents an entity
26
+ and typically includes 'text' and 'type' keys (e.g.,
27
+ [{"text": "John Doe", "type": "PERSON"}, ...]).
28
+ """
29
+ pass
30
+
31
+ def preprocess(self, text: str) -> str:
32
+ """
33
+ Optional: Preprocess the input text before entity extraction.
34
+
35
+ :param text: Raw input text.
36
+ :return: Preprocessed text.
37
+ """
38
+ return text.strip()
39
+
40
+ def _dispatch(
41
+ self, entities: list[dict[str, str]], original_text: Optional[str] = None
42
+ ) -> None:
43
+ """
44
+ Sends the extracted entities to any registered result handlers.
45
+
46
+ :param entities: The list of extracted entities.
47
+ :param original_text: Optionally pass the original text.
48
+ """
49
+ result_data = {
50
+ "entities": entities,
51
+ }
52
+ if original_text is not None:
53
+ result_data["original_text"] = original_text
54
+
55
+ for handler in self.handlers:
56
+ try:
57
+ handler.handle(result_data)
58
+ except Exception:
59
+ logging.error(
60
+ f"Handler {handler.__class__.__name__} failed", exc_info=True
61
+ )
@@ -0,0 +1,35 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, Optional
3
+
4
+
5
+ class BaseQuestionDetector(ABC):
6
+ """
7
+ Base class for all detectors that output a boolean (True/False).
8
+ """
9
+
10
+ def __init__(
11
+ self,
12
+ handlers: Optional[list[Any]] = None,
13
+ ):
14
+ self.handlers = handlers or []
15
+
16
+ @abstractmethod
17
+ def detect(self, text: str) -> bool:
18
+ """
19
+ Detect if the input text meets the condition.
20
+ Should return True or False.
21
+ """
22
+ pass
23
+
24
+ def preprocess(self, text: str) -> str:
25
+ """
26
+ Optional text preprocessing step.
27
+ """
28
+ return text.strip()
29
+
30
+ def _dispatch(self, result: dict) -> None:
31
+ """
32
+ Dispatch the result to handlers.
33
+ """
34
+ for handler in self.handlers:
35
+ handler.handle(result)
@@ -0,0 +1,99 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from typing import Any, Optional
4
+
5
+
6
+ class BaseQuestionGenerator(ABC):
7
+ """
8
+ Base class for all systems that generate a question from a given answer.
9
+ """
10
+
11
+ def __init__(self, handlers: Optional[list[Any]] = None):
12
+ """
13
+ Initializes the BaseQuestionGenerator with optional result handlers.
14
+
15
+ :param handlers: Optional list of handlers to process the generation results.
16
+ """
17
+ self.handlers = handlers or []
18
+
19
+ @abstractmethod
20
+ def generate_question(self, answer: str) -> str:
21
+ """
22
+ Generates an appropriate question for the provided answer.
23
+
24
+ :param answer: The answer string for which a question needs to be generated.
25
+ :return: The generated question string.
26
+ """
27
+ pass
28
+
29
+ def preprocess(self, text: str) -> str:
30
+ """
31
+ Optional: Preprocess the input answer text before question generation.
32
+
33
+ :param text: Raw input answer text.
34
+ :return: Preprocessed text.
35
+ """
36
+ return text.strip()
37
+
38
+ def _dispatch(self, result_data: dict) -> None:
39
+ """
40
+ Sends the generated question and original answer to any registered result handlers.
41
+
42
+ :param result_data: A dictionary containing the results (e.g., {"original_answer": ..., "generated_question": ...}).
43
+ """
44
+ for handler in self.handlers:
45
+ try:
46
+ handler.handle(result_data)
47
+ except Exception:
48
+ logging.error(
49
+ f"Handler {handler.__class__.__name__} failed", exc_info=True
50
+ )
51
+
52
+
53
+ class BaseQuestionGeneratorFromSubject(ABC):
54
+ """
55
+ Base class for all systems that generate a question from a given subject
56
+ it will curate some number of questions
57
+
58
+ """
59
+
60
+ def __init__(self, handlers: Optional[list[Any]] = None):
61
+ """
62
+ Initializes the BaseQuestionGeneratorFromSubject with optional result handlers.
63
+
64
+ :param handlers: Optional list of handlers to process the generation results.
65
+ """
66
+ self.handlers = handlers or []
67
+
68
+ @abstractmethod
69
+ def generate_question(self, subject: str) -> str:
70
+ """
71
+ Generates an appropriate question for the provided answer.
72
+
73
+ :param answer: The answer string for which a question needs to be generated.
74
+ :return: The generated question string.
75
+ """
76
+ pass
77
+
78
+ def preprocess(self, text: str) -> str:
79
+ """
80
+ Optional: Preprocess the input answer text before question generation.
81
+
82
+ :param text: Raw input answer text.
83
+ :return: Preprocessed text.
84
+ """
85
+ return text.strip()
86
+
87
+ def _dispatch(self, result_data: dict) -> None:
88
+ """
89
+ Sends the generated question and original answer to any registered result handlers.
90
+
91
+ :param result_data: A dictionary containing the results (e.g., {"original_answer": ..., "generated_question": ...}).
92
+ """
93
+ for handler in self.handlers:
94
+ try:
95
+ handler.handle(result_data)
96
+ except Exception:
97
+ logging.error(
98
+ f"Handler {handler.__class__.__name__} failed", exc_info=True
99
+ )