hamtaa-texttools 0.1.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (60) hide show
  1. hamtaa_texttools-0.1.43.dist-info/METADATA +60 -0
  2. hamtaa_texttools-0.1.43.dist-info/RECORD +60 -0
  3. hamtaa_texttools-0.1.43.dist-info/WHEEL +5 -0
  4. hamtaa_texttools-0.1.43.dist-info/top_level.txt +1 -0
  5. texttools/__init__.py +26 -0
  6. texttools/base/__init__.py +3 -0
  7. texttools/base/base_categorizer.py +40 -0
  8. texttools/base/base_keyword_extractor.py +35 -0
  9. texttools/base/base_ner_extractor.py +61 -0
  10. texttools/base/base_question_detector.py +35 -0
  11. texttools/base/base_question_generator.py +99 -0
  12. texttools/base/base_question_merger.py +59 -0
  13. texttools/base/base_question_rewriter.py +61 -0
  14. texttools/base/base_router.py +33 -0
  15. texttools/base/base_summarizer.py +55 -0
  16. texttools/base/base_task_performer.py +53 -0
  17. texttools/base/base_translator.py +38 -0
  18. texttools/batch_manager/__init__.py +2 -0
  19. texttools/batch_manager/batch_manager.py +241 -0
  20. texttools/batch_manager/batch_runner.py +207 -0
  21. texttools/formatter/__init__.py +1 -0
  22. texttools/formatter/base.py +26 -0
  23. texttools/formatter/gemma3_formatter.py +51 -0
  24. texttools/handlers/__init__.py +6 -0
  25. texttools/handlers/categorizer/__init__.py +6 -0
  26. texttools/handlers/categorizer/categorizer.py +61 -0
  27. texttools/handlers/handlers.py +88 -0
  28. texttools/tools/__init__.py +33 -0
  29. texttools/tools/categorizer/__init__.py +2 -0
  30. texttools/tools/categorizer/encoder_model/__init__.py +1 -0
  31. texttools/tools/categorizer/encoder_model/encoder_vectorizer.py +51 -0
  32. texttools/tools/categorizer/llm/__init__.py +2 -0
  33. texttools/tools/categorizer/llm/gemma_categorizer.py +169 -0
  34. texttools/tools/categorizer/llm/openai_categorizer.py +80 -0
  35. texttools/tools/keyword_extractor/__init__.py +1 -0
  36. texttools/tools/keyword_extractor/gemma_extractor.py +138 -0
  37. texttools/tools/merger/__init__.py +2 -0
  38. texttools/tools/merger/gemma_question_merger.py +214 -0
  39. texttools/tools/ner/__init__.py +1 -0
  40. texttools/tools/ner/gemma_ner_extractor.py +157 -0
  41. texttools/tools/question_detector/__init__.py +2 -0
  42. texttools/tools/question_detector/gemma_detector.py +130 -0
  43. texttools/tools/question_detector/llm_detector.py +112 -0
  44. texttools/tools/question_generator/__init__.py +1 -0
  45. texttools/tools/question_generator/gemma_question_generator.py +198 -0
  46. texttools/tools/reranker/__init__.py +3 -0
  47. texttools/tools/reranker/reranker.py +137 -0
  48. texttools/tools/reranker/scorer.py +216 -0
  49. texttools/tools/reranker/sorter.py +278 -0
  50. texttools/tools/rewriter/__init__.py +2 -0
  51. texttools/tools/rewriter/gemma_question_rewriter.py +213 -0
  52. texttools/tools/router/__init__.py +0 -0
  53. texttools/tools/router/gemma_router.py +169 -0
  54. texttools/tools/subject_to_question/__init__.py +1 -0
  55. texttools/tools/subject_to_question/gemma_question_generator.py +224 -0
  56. texttools/tools/summarizer/__init__.py +2 -0
  57. texttools/tools/summarizer/gemma_summarizer.py +140 -0
  58. texttools/tools/summarizer/llm_summerizer.py +108 -0
  59. texttools/tools/translator/__init__.py +1 -0
  60. texttools/tools/translator/gemma_translator.py +202 -0
@@ -0,0 +1,60 @@
1
+ Metadata-Version: 2.4
2
+ Name: hamtaa-texttools
3
+ Version: 0.1.43
4
+ Summary: A set of high-level NLP tools
5
+ Author: Tohidi, Montazer, Givechi, Mousavinezhad
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: openai==1.97.1
9
+ Requires-Dist: numpy==1.26.4
10
+
11
+ # Text Tools
12
+
13
+ <p align="center">
14
+ <img src="https://img.shields.io/badge/TextTools-Python%20Text%20Processing-black?style=for-the-badge&logo=python&logoColor=white">
15
+ </p>
16
+
17
+
18
+ <p align="center">
19
+ <img src="docs/logo.png" alt="Preview" width="300" height="300">
20
+ </p>
21
+
22
+
23
+ ### How to Install
24
+
25
+ Install the package using:
26
+
27
+ ```bash
28
+ pip install -U hamta-texttools
29
+ ```
30
+
31
+
32
+ ---
33
+
34
+ ## What This Library Is *Not*
35
+
36
+ This is **not** a collection of low-level utilities.
37
+
38
+ To clarify: this library **does not** include things like:
39
+ - An standard `regex`
40
+ - Word normalization utilities
41
+
42
+ ---
43
+
44
+ ## What This Library *Provides*
45
+
46
+ This is a set of **high-level natural language processing (NLP)** tools.
47
+
48
+ Some of the features include:
49
+ - `question_detector`: Detecting if an incoming text is a question or not
50
+ - `categorizer`: No finetuning need, categorizer
51
+ - ... (Tell me what you want!)
52
+
53
+ ---
54
+
55
+ ## When to Use This Library
56
+
57
+ Use `texttools` when:
58
+ - You need to **process large volumes of data using OpenAI’s GPT models** via the BATCH API.
59
+ - You want to treat an **LLM as a function** in Python that outputs structured JSON or Pydantic models.
60
+ - You need to **categorize large datasets** using vector embeddings, efficiently and at scale.
@@ -0,0 +1,60 @@
1
+ texttools/__init__.py,sha256=cI10Q_zaM9DPUCVOM79gZceuyt6Pjgpj3R-AG7xgUM8,778
2
+ texttools/base/__init__.py,sha256=KUGm-Oe0BxlrRhPS-Jm2q1NCmwX8MdtZtloia7bcLaM,189
3
+ texttools/base/base_categorizer.py,sha256=ojup94iXLxh92TjiJmrFXeRbsWKlon7PPAqez96B1bs,1130
4
+ texttools/base/base_keyword_extractor.py,sha256=uKpxb3xI-sim-vXWe1R4_36QRhSNsWDR4IuVdpkZMME,868
5
+ texttools/base/base_ner_extractor.py,sha256=D0LRNSyq1uIU9Qtepi7zpCWWzYz-AOxpVNjq97S1oUA,1933
6
+ texttools/base/base_question_detector.py,sha256=FR9yDP0Z8aAfGafZy3kcpSDUUYWLJM7saRKdeVN5TiM,829
7
+ texttools/base/base_question_generator.py,sha256=L_2ZwqyV9GxsKiQynWKRJG15OBFgQqiCic5H0i8R5yk,3238
8
+ texttools/base/base_question_merger.py,sha256=TYhsihKaIdyGCVu4AcjxPZ1_HocHt__voV8WWGMRpMs,1945
9
+ texttools/base/base_question_rewriter.py,sha256=K6ZnAjxi2qw4yLxm92zTI1IStCfX6c_6lCfIuBDSx8w,1973
10
+ texttools/base/base_router.py,sha256=pFDjIXFqAhPiS9Onu5py_GxOq8geDGJDQh6k6IhCkvw,933
11
+ texttools/base/base_summarizer.py,sha256=7NAilhUPs6ZUwkBpTtXAj6n2XxQH1w6SOolf3gQX2gc,1627
12
+ texttools/base/base_task_performer.py,sha256=3-6qshkie50S7pRG4WHRNC_RdUbSmHOPKW56CD92-rM,1852
13
+ texttools/base/base_translator.py,sha256=BoOxqaoPoUs8t1O3m2yL9pQa5iwisl097immTVcGZoE,1020
14
+ texttools/batch_manager/__init__.py,sha256=3ZkxA395lRD4gNxJ1vp0fNuz_XuBr50GoP51rrwQ0Ks,87
15
+ texttools/batch_manager/batch_manager.py,sha256=jAmKskL3OTYwwsO1mWsWAB3VxMlOF07c2GW1Ev83ZhY,9283
16
+ texttools/batch_manager/batch_runner.py,sha256=kW0IPauI11xpssApMA7b4XI19FePImywym3V7tBaa-o,7404
17
+ texttools/formatter/__init__.py,sha256=KHz2tFZctbit_HVbQNCTMi46JzmKlg-uB6Ost63IpVU,46
18
+ texttools/formatter/base.py,sha256=0fiM6E7NdJevAVpL6yyPaUZVJGKWxE3fr-Ay1oqgJqQ,879
19
+ texttools/formatter/gemma3_formatter.py,sha256=c7YRj6fIPqhs_nvnSbWRTuguRoNQJvuIvk_bcaVDioM,1634
20
+ texttools/handlers/__init__.py,sha256=sv0JloipQ57AI0xo-3w9k6cK5rYjZP3ltR2EbBhkHTA,121
21
+ texttools/handlers/handlers.py,sha256=LtC4FBuzRUDy3Jw-Fp21WR-QS1jOcDhsGaMPFQGjfTw,2381
22
+ texttools/handlers/categorizer/__init__.py,sha256=mE05vt_ma6vcP8pQ37BZ85WVQ8jhcjDS0iZV81_LFCY,127
23
+ texttools/handlers/categorizer/categorizer.py,sha256=HBpdhtCGUPl1TJUOxbgSLmVWD7o9xeIjmSWXvYzGrCA,1592
24
+ texttools/tools/__init__.py,sha256=V3ZjSj_ZI9r02sOmxpxxxKBbBbtuYS1MQqtrdGZHC_A,1121
25
+ texttools/tools/categorizer/__init__.py,sha256=VY0SVdik0et0fwLDj7qn-d5LtVqVBIalvlRVci699i4,48
26
+ texttools/tools/categorizer/encoder_model/__init__.py,sha256=7UwoPlQ09VGN0cqfi5fPQRfsZZ8hoZj6fL6cax1BLSU,53
27
+ texttools/tools/categorizer/encoder_model/encoder_vectorizer.py,sha256=MHPVJQJlvNhZ5xLVXk4FtvrORW2yxPSAnjEhjPbkQts,1476
28
+ texttools/tools/categorizer/llm/__init__.py,sha256=0VbxvInITfNUlOF6bJqcUKKaYWlIe9K3vRmIRuvAGcY,95
29
+ texttools/tools/categorizer/llm/gemma_categorizer.py,sha256=tjwKonTjT5cAhxWQaVyvyooRyOlGACHpnn72PNoLk-8,5636
30
+ texttools/tools/categorizer/llm/openai_categorizer.py,sha256=omRk77Z5ZCIAz17h4wPDP_EcBSsscA-PQJpQjtI6--o,2547
31
+ texttools/tools/keyword_extractor/__init__.py,sha256=eTpujS85MmRRbnNwc2ifKUh60W8OG4RQFmWki3Z7C_0,84
32
+ texttools/tools/keyword_extractor/gemma_extractor.py,sha256=TJ4wMPWRuuzRi_Q0hr7UauKhEg8U_5U5j1D_lTFrn4s,4349
33
+ texttools/tools/merger/__init__.py,sha256=bh2RBpqJvDaqEmDrM9y_GcjRqibagifAxiZVu8nEHc0,115
34
+ texttools/tools/merger/gemma_question_merger.py,sha256=JAC-52kBbabIzEWp0MFi9viiu8nZOAMPaJZALHvNMqo,8035
35
+ texttools/tools/ner/__init__.py,sha256=BW84BcItel6Mc2JlaDL6qvAktVMkti67VXceeCnOB1g,70
36
+ texttools/tools/ner/gemma_ner_extractor.py,sha256=YhyIwX_8bdwkFb4gY8g9mZdYHW_r1jCvbmjjNCK9Wfo,5384
37
+ texttools/tools/question_detector/__init__.py,sha256=ulArGttooSoxEe0vUDQSxUQrnsxr7gH9l-LjSER2dVI,162
38
+ texttools/tools/question_detector/gemma_detector.py,sha256=dHWHcthjMArW42CNPGmk3Xbj1AxjM33A34dOmLUA64U,4141
39
+ texttools/tools/question_detector/llm_detector.py,sha256=zo89eh359hqQGGf83-6M22AaiH7q-m0m91SjTyxZaYs,3862
40
+ texttools/tools/question_generator/__init__.py,sha256=EAElpB_YeyMoBqvFNjbW2a_j18SLtiKQ7sRmdS58Fww,61
41
+ texttools/tools/question_generator/gemma_question_generator.py,sha256=V5QcXmHZ5shTvrThOxUrKJ4FqP0P58NIJbsPdyyy5IM,6744
42
+ texttools/tools/reranker/__init__.py,sha256=70jqJ9cjpPzzvnMYgHYGVZ9PrWrN9N97visqD_PVxwU,100
43
+ texttools/tools/reranker/reranker.py,sha256=2SiTMIxempMuHui2n4GJV_2dLGBeoC7WAn_rVVXlMBA,5518
44
+ texttools/tools/reranker/scorer.py,sha256=fQ3Ya8QmNhrcmb-Rf-72hvhweGvVj6gQ4KOlham2eE8,8176
45
+ texttools/tools/reranker/sorter.py,sha256=_ed5zGz7K60skPFFuEQZ1ObBFA71LAfVT6FyWicA-Pw,11419
46
+ texttools/tools/rewriter/__init__.py,sha256=U_qwGeEOqHAcV4p2CHVb0AIvHKFfdvykRzGyWD54aWA,121
47
+ texttools/tools/rewriter/gemma_question_rewriter.py,sha256=jXtRswfBvHn9QmE90JvxEmLvCTbwZqZhD_A5ONWeCzo,7925
48
+ texttools/tools/router/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
+ texttools/tools/router/gemma_router.py,sha256=VX-kHphZVZNd0_ajugN08hGkWNUeUriwfonpYy2TIS4,5619
50
+ texttools/tools/subject_to_question/__init__.py,sha256=VJpns16Qe5OL_-4WuGDUNShcJsodB2khGWT3Q1Hc-WU,72
51
+ texttools/tools/subject_to_question/gemma_question_generator.py,sha256=VKXHhYHEvhFLUR87iEh0eFpD_4ueX4np8IjF-NkgWrY,7417
52
+ texttools/tools/summarizer/__init__.py,sha256=phrR7qO20CNhO3hjXQBzhTRVumdVdGSufmH4GEYkhj4,140
53
+ texttools/tools/summarizer/gemma_summarizer.py,sha256=ikhsBv7AiZD1dT_d12AyjXxojzSW92e2y5WjchI_3bE,4474
54
+ texttools/tools/summarizer/llm_summerizer.py,sha256=-0rUKbSnl1aDeBfJ5DCSbIlwd2k-9qIaCKgoQJa0hWc,3412
55
+ texttools/tools/translator/__init__.py,sha256=KO1m08J2BZwRqBGO9ICB4l4cnH1jfHLHL5HbgYFUWM8,72
56
+ texttools/tools/translator/gemma_translator.py,sha256=57NMfJAZHQjZSr_eCBePE_Pnag8pu3O00Jicxhzn6Jc,7572
57
+ hamtaa_texttools-0.1.43.dist-info/METADATA,sha256=GjVLyZZclY4hp29Yd1DpRtqvFDmTAGOoYEOI-FFvbA0,1482
58
+ hamtaa_texttools-0.1.43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
59
+ hamtaa_texttools-0.1.43.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
60
+ hamtaa_texttools-0.1.43.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ texttools
texttools/__init__.py ADDED
@@ -0,0 +1,26 @@
1
+ from texttools.batch_manager import BatchJobRunner, SimpleBatchManager
2
+ from texttools.handlers import (
3
+ NoOpResultHandler,
4
+ PrintResultHandler,
5
+ ResultHandler,
6
+ SaveToFileResultHandler,
7
+ )
8
+ from texttools.tools.categorizer.encoder_model.encoder_vectorizer import (
9
+ EmbeddingCategorizer,
10
+ )
11
+ from texttools.tools.categorizer.llm.openai_categorizer import LLMCategorizer
12
+ from texttools.tools.question_detector.llm_detector import LLMQuestionDetector
13
+ from texttools.tools.summarizer import LLMSummarizer
14
+
15
+ __all__ = [
16
+ "LLMQuestionDetector",
17
+ "NoOpResultHandler",
18
+ "PrintResultHandler",
19
+ "ResultHandler",
20
+ "SaveToFileResultHandler",
21
+ "EmbeddingCategorizer",
22
+ "LLMCategorizer",
23
+ "SimpleBatchManager",
24
+ "BatchJobRunner",
25
+ "LLMSummarizer",
26
+ ]
@@ -0,0 +1,3 @@
1
+ from texttools.base.base_categorizer import BaseCategorizer
2
+ from texttools.base.base_question_detector import BaseQuestionDetector
3
+ from texttools.base.base_summarizer import BaseSummarizer
@@ -0,0 +1,40 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from enum import Enum
4
+ from typing import Optional
5
+
6
+ from texttools.handlers import NoOpResultHandler, ResultHandler
7
+
8
+
9
+ class BaseCategorizer(ABC):
10
+ def __init__(
11
+ self,
12
+ handlers: Optional[list[ResultHandler]] = None,
13
+ ):
14
+ """
15
+ handlers: List of ResultHandler objects that will process results after categorization.
16
+ """
17
+ self.handlers = handlers or [NoOpResultHandler()]
18
+
19
+ @abstractmethod
20
+ def categorize(self, text: str) -> Enum:
21
+ """
22
+ Categorize the input text.
23
+ Must return one of the Enum members defined in self.categories.
24
+ """
25
+ pass
26
+
27
+ def preprocess(self, text: str) -> str:
28
+ """
29
+ Optional: Preprocess text before categorization.
30
+ """
31
+ return text
32
+
33
+ def _dispatch(self, results: dict) -> None:
34
+ for handler in self.handlers:
35
+ try:
36
+ handler.handle(results)
37
+ except Exception:
38
+ logging.error(
39
+ f"Handler {handler.__class__.__name__} failed", exc_info=True
40
+ )
@@ -0,0 +1,35 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, Optional
3
+
4
+
5
+ class BaseKeywordExtractor(ABC):
6
+ """
7
+ Base class for all detectors that output a list of keywords.
8
+ """
9
+
10
+ def __init__(
11
+ self,
12
+ handlers: Optional[list[Any]] = None,
13
+ ):
14
+ self.handlers = handlers or []
15
+
16
+ @abstractmethod
17
+ def extract_keywords(self, text: str) -> list[str]:
18
+ """
19
+ Extract keywords from the input text.
20
+ Should return a list of strings, where each string is a keyword.
21
+ """
22
+ pass
23
+
24
+ def preprocess(self, text: str) -> str:
25
+ """
26
+ Optional text preprocessing step.
27
+ """
28
+ return text.strip()
29
+
30
+ def _dispatch(self, result: dict) -> None:
31
+ """
32
+ Dispatch the result to handlers.
33
+ """
34
+ for handler in self.handlers:
35
+ handler.handle(result)
@@ -0,0 +1,61 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from typing import Any, Optional
4
+
5
+
6
+ class BaseNERExtractor(ABC):
7
+ """
8
+ Base class for all Named Entity Recognition (NER) systems.
9
+ """
10
+
11
+ def __init__(self, handlers: Optional[list[Any]] = None):
12
+ """
13
+ Initializes the BaseNERExtractor with optional result handlers.
14
+
15
+ :param handlers: Optional list of handlers to process the NER results.
16
+ """
17
+ self.handlers = handlers or []
18
+
19
+ @abstractmethod
20
+ def extract_entities(self, text: str) -> list[dict[str, str]]:
21
+ """
22
+ Extracts named entities from the input text.
23
+
24
+ :param text: The text from which to extract entities.
25
+ :return: A list of dictionaries, where each dictionary represents an entity
26
+ and typically includes 'text' and 'type' keys (e.g.,
27
+ [{"text": "John Doe", "type": "PERSON"}, ...]).
28
+ """
29
+ pass
30
+
31
+ def preprocess(self, text: str) -> str:
32
+ """
33
+ Optional: Preprocess the input text before entity extraction.
34
+
35
+ :param text: Raw input text.
36
+ :return: Preprocessed text.
37
+ """
38
+ return text.strip()
39
+
40
+ def _dispatch(
41
+ self, entities: list[dict[str, str]], original_text: Optional[str] = None
42
+ ) -> None:
43
+ """
44
+ Sends the extracted entities to any registered result handlers.
45
+
46
+ :param entities: The list of extracted entities.
47
+ :param original_text: Optionally pass the original text.
48
+ """
49
+ result_data = {
50
+ "entities": entities,
51
+ }
52
+ if original_text is not None:
53
+ result_data["original_text"] = original_text
54
+
55
+ for handler in self.handlers:
56
+ try:
57
+ handler.handle(result_data)
58
+ except Exception:
59
+ logging.error(
60
+ f"Handler {handler.__class__.__name__} failed", exc_info=True
61
+ )
@@ -0,0 +1,35 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, Optional
3
+
4
+
5
+ class BaseQuestionDetector(ABC):
6
+ """
7
+ Base class for all detectors that output a boolean (True/False).
8
+ """
9
+
10
+ def __init__(
11
+ self,
12
+ handlers: Optional[list[Any]] = None,
13
+ ):
14
+ self.handlers = handlers or []
15
+
16
+ @abstractmethod
17
+ def detect(self, text: str) -> bool:
18
+ """
19
+ Detect if the input text meets the condition.
20
+ Should return True or False.
21
+ """
22
+ pass
23
+
24
+ def preprocess(self, text: str) -> str:
25
+ """
26
+ Optional text preprocessing step.
27
+ """
28
+ return text.strip()
29
+
30
+ def _dispatch(self, result: dict) -> None:
31
+ """
32
+ Dispatch the result to handlers.
33
+ """
34
+ for handler in self.handlers:
35
+ handler.handle(result)
@@ -0,0 +1,99 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from typing import Any, Optional
4
+
5
+
6
+ class BaseQuestionGenerator(ABC):
7
+ """
8
+ Base class for all systems that generate a question from a given answer.
9
+ """
10
+
11
+ def __init__(self, handlers: Optional[list[Any]] = None):
12
+ """
13
+ Initializes the BaseQuestionGenerator with optional result handlers.
14
+
15
+ :param handlers: Optional list of handlers to process the generation results.
16
+ """
17
+ self.handlers = handlers or []
18
+
19
+ @abstractmethod
20
+ def generate_question(self, answer: str) -> str:
21
+ """
22
+ Generates an appropriate question for the provided answer.
23
+
24
+ :param answer: The answer string for which a question needs to be generated.
25
+ :return: The generated question string.
26
+ """
27
+ pass
28
+
29
+ def preprocess(self, text: str) -> str:
30
+ """
31
+ Optional: Preprocess the input answer text before question generation.
32
+
33
+ :param text: Raw input answer text.
34
+ :return: Preprocessed text.
35
+ """
36
+ return text.strip()
37
+
38
+ def _dispatch(self, result_data: dict) -> None:
39
+ """
40
+ Sends the generated question and original answer to any registered result handlers.
41
+
42
+ :param result_data: A dictionary containing the results (e.g., {"original_answer": ..., "generated_question": ...}).
43
+ """
44
+ for handler in self.handlers:
45
+ try:
46
+ handler.handle(result_data)
47
+ except Exception:
48
+ logging.error(
49
+ f"Handler {handler.__class__.__name__} failed", exc_info=True
50
+ )
51
+
52
+
53
+ class BaseQuestionGeneratorFromSubject(ABC):
54
+ """
55
+ Base class for all systems that generate a question from a given subject
56
+ it will curate some number of questions
57
+
58
+ """
59
+
60
+ def __init__(self, handlers: Optional[list[Any]] = None):
61
+ """
62
+ Initializes the BaseQuestionGeneratorFromSubject with optional result handlers.
63
+
64
+ :param handlers: Optional list of handlers to process the generation results.
65
+ """
66
+ self.handlers = handlers or []
67
+
68
+ @abstractmethod
69
+ def generate_question(self, subject: str) -> str:
70
+ """
71
+ Generates an appropriate question for the provided answer.
72
+
73
+ :param answer: The answer string for which a question needs to be generated.
74
+ :return: The generated question string.
75
+ """
76
+ pass
77
+
78
+ def preprocess(self, text: str) -> str:
79
+ """
80
+ Optional: Preprocess the input answer text before question generation.
81
+
82
+ :param text: Raw input answer text.
83
+ :return: Preprocessed text.
84
+ """
85
+ return text.strip()
86
+
87
+ def _dispatch(self, result_data: dict) -> None:
88
+ """
89
+ Sends the generated question and original answer to any registered result handlers.
90
+
91
+ :param result_data: A dictionary containing the results (e.g., {"original_answer": ..., "generated_question": ...}).
92
+ """
93
+ for handler in self.handlers:
94
+ try:
95
+ handler.handle(result_data)
96
+ except Exception:
97
+ logging.error(
98
+ f"Handler {handler.__class__.__name__} failed", exc_info=True
99
+ )
@@ -0,0 +1,59 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from enum import Enum
4
+ from typing import Any, Optional
5
+
6
+
7
+ class MergingMode(Enum):
8
+ """
9
+ Defines the two modes for question merging.
10
+ """
11
+
12
+ DEFAULT_MODE = "immediate merging"
13
+ REASON_MODE = "merging with reasoning"
14
+
15
+
16
+ class BaseQuestionsMerger(ABC):
17
+ """
18
+ Base class for all systems that merges more that one question with preserving the contents.
19
+ """
20
+
21
+ def __init__(self, handlers: Optional[list[Any]] = None):
22
+ """
23
+ Initializes the BaseQuestionsMerger with optional result handlers.
24
+ :param handlers: Optional list of handlers to process the merged results.
25
+ """
26
+ self.handlers = handlers or []
27
+
28
+ @abstractmethod
29
+ def merging_question(self, questions: list[str], mode: MergingMode) -> str:
30
+ """
31
+ merges the input questions based on the specified mode.
32
+
33
+ :param question: The original questions' string as a list.
34
+ :param mode: The MergingMode indicating how the questions should be merged.
35
+ :return: The rephrased and merged question string.
36
+ """
37
+ pass
38
+
39
+ def preprocess(self, text: str) -> str:
40
+ """
41
+ Optional: Preprocess the input questions' text before merging.
42
+
43
+ :param text: Raw input question's texts.
44
+ :return: Preprocessed text.
45
+ """
46
+ return text.strip()
47
+
48
+ def _dispatch(self, result_data: dict) -> None:
49
+ """
50
+ Sends the merged question and original questions to any registered result handlers.
51
+ :param result_data: A dictionary containing the results (e.g., {"original_question": ..., "rewritten_question": ..., "mode": ...}).
52
+ """
53
+ for handler in self.handlers:
54
+ try:
55
+ handler.handle(result_data)
56
+ except Exception:
57
+ logging.error(
58
+ f"Handler {handler.__class__.__name__} failed", exc_info=True
59
+ )
@@ -0,0 +1,61 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from enum import Enum
4
+ from typing import Any, Optional
5
+
6
+
7
+ class RewriteMode(Enum):
8
+ """
9
+ Defines the two modes for question rewriting.
10
+ """
11
+
12
+ SAME_MEANING_DIFFERENT_WORDING = "same_meaning_different_wording"
13
+ DIFFERENT_MEANING_SIMILAR_WORDING = "different_meaning_similar_wording"
14
+
15
+
16
+ class BaseQuestionRewriter(ABC):
17
+ """
18
+ Base class for all systems that rewrite a question with different wording.
19
+ """
20
+
21
+ def __init__(self, handlers: Optional[list[Any]] = None):
22
+ """
23
+ Initializes the BaseQuestionRewriter with optional result handlers.
24
+
25
+ :param handlers: Optional list of handlers to process the rewriting results.
26
+ """
27
+ self.handlers = handlers or []
28
+
29
+ @abstractmethod
30
+ def rewrite_question(self, question: str, mode: RewriteMode) -> str:
31
+ """
32
+ Rewrites the input question based on the specified mode.
33
+
34
+ :param question: The original question string.
35
+ :param mode: The RewriteMode indicating how the question should be rewritten.
36
+ :return: The rephrased question string.
37
+ """
38
+ pass
39
+
40
+ def preprocess(self, text: str) -> str:
41
+ """
42
+ Optional: Preprocess the input question text before rewriting.
43
+
44
+ :param text: Raw input question text.
45
+ :return: Preprocessed text.
46
+ """
47
+ return text.strip()
48
+
49
+ def _dispatch(self, result_data: dict) -> None:
50
+ """
51
+ Sends the rewritten question and original question to any registered result handlers.
52
+
53
+ :param result_data: A dictionary containing the results (e.g., {"original_question": ..., "rewritten_question": ..., "mode": ...}).
54
+ """
55
+ for handler in self.handlers:
56
+ try:
57
+ handler.handle(result_data)
58
+ except Exception:
59
+ logging.error(
60
+ f"Handler {handler.__class__.__name__} failed", exc_info=True
61
+ )
@@ -0,0 +1,33 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Optional
3
+
4
+ from texttools.handlers import NoOpResultHandler, ResultHandler
5
+
6
+
7
+ class BaseRouter(ABC):
8
+ def __init__(self, handlers: Optional[list[ResultHandler]] = None):
9
+ """
10
+ Base class for routers
11
+
12
+ :param handlers: Optional list of handlers to process the summarization result.
13
+ """
14
+ self.handlers = handlers or [NoOpResultHandler()]
15
+
16
+ @abstractmethod
17
+ def route(self, text: str) -> str:
18
+ """
19
+ decides and classifies the inputted text between the choices that it has
20
+
21
+ :param text: The text to summarize.
22
+ :return: A route for the given text.
23
+ """
24
+ pass
25
+
26
+ def preprocess(self, text: str) -> str:
27
+ """
28
+ Optional: Preprocess the input text before summarization.
29
+
30
+ :param text: Raw input text.
31
+ :return: Preprocessed text.
32
+ """
33
+ return text.strip()
@@ -0,0 +1,55 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from typing import Optional
4
+
5
+ from texttools.handlers import NoOpResultHandler, ResultHandler
6
+
7
+
8
+ class BaseSummarizer(ABC):
9
+ def __init__(self, handlers: Optional[list[ResultHandler]] = None):
10
+ """
11
+ Base class for text summarization.
12
+
13
+ :param handlers: Optional list of handlers to process the summarization result.
14
+ """
15
+ self.handlers = handlers or [NoOpResultHandler()]
16
+
17
+ @abstractmethod
18
+ def summarize(self, text: str) -> str:
19
+ """
20
+ Generate a summary for the input text.
21
+
22
+ :param text: The text to summarize.
23
+ :return: A summary string.
24
+ """
25
+ pass
26
+
27
+ def preprocess(self, text: str) -> str:
28
+ """
29
+ Optional: Preprocess the input text before summarization.
30
+
31
+ :param text: Raw input text.
32
+ :return: Preprocessed text.
33
+ """
34
+ return text
35
+
36
+ def _dispatch(self, summary: str, original_text: Optional[str] = None) -> None:
37
+ """
38
+ Send the summary result to any registered result handlers.
39
+
40
+ :param summary: The generated summary.
41
+ :param original_text: Optionally pass the original text.
42
+ """
43
+ result_data = {
44
+ "summary": summary,
45
+ }
46
+ if original_text is not None:
47
+ result_data["original_text"] = original_text
48
+
49
+ for handler in self.handlers:
50
+ try:
51
+ handler.handle(result_data)
52
+ except Exception:
53
+ logging.error(
54
+ f"Handler {handler.__class__.__name__} failed", exc_info=True
55
+ )