PyPI - hamtaa-texttools - Versions diffs - 0.1.43__py3-none-any.whl - Mend

hamtaa-texttools 0.1.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (60) hide show

hamtaa_texttools-0.1.43.dist-info/METADATA +60 -0
hamtaa_texttools-0.1.43.dist-info/RECORD +60 -0
hamtaa_texttools-0.1.43.dist-info/WHEEL +5 -0
hamtaa_texttools-0.1.43.dist-info/top_level.txt +1 -0
texttools/__init__.py +26 -0
texttools/base/__init__.py +3 -0
texttools/base/base_categorizer.py +40 -0
texttools/base/base_keyword_extractor.py +35 -0
texttools/base/base_ner_extractor.py +61 -0
texttools/base/base_question_detector.py +35 -0
texttools/base/base_question_generator.py +99 -0
texttools/base/base_question_merger.py +59 -0
texttools/base/base_question_rewriter.py +61 -0
texttools/base/base_router.py +33 -0
texttools/base/base_summarizer.py +55 -0
texttools/base/base_task_performer.py +53 -0
texttools/base/base_translator.py +38 -0
texttools/batch_manager/__init__.py +2 -0
texttools/batch_manager/batch_manager.py +241 -0
texttools/batch_manager/batch_runner.py +207 -0
texttools/formatter/__init__.py +1 -0
texttools/formatter/base.py +26 -0
texttools/formatter/gemma3_formatter.py +51 -0
texttools/handlers/__init__.py +6 -0
texttools/handlers/categorizer/__init__.py +6 -0
texttools/handlers/categorizer/categorizer.py +61 -0
texttools/handlers/handlers.py +88 -0
texttools/tools/__init__.py +33 -0
texttools/tools/categorizer/__init__.py +2 -0
texttools/tools/categorizer/encoder_model/__init__.py +1 -0
texttools/tools/categorizer/encoder_model/encoder_vectorizer.py +51 -0
texttools/tools/categorizer/llm/__init__.py +2 -0
texttools/tools/categorizer/llm/gemma_categorizer.py +169 -0
texttools/tools/categorizer/llm/openai_categorizer.py +80 -0
texttools/tools/keyword_extractor/__init__.py +1 -0
texttools/tools/keyword_extractor/gemma_extractor.py +138 -0
texttools/tools/merger/__init__.py +2 -0
texttools/tools/merger/gemma_question_merger.py +214 -0
texttools/tools/ner/__init__.py +1 -0
texttools/tools/ner/gemma_ner_extractor.py +157 -0
texttools/tools/question_detector/__init__.py +2 -0
texttools/tools/question_detector/gemma_detector.py +130 -0
texttools/tools/question_detector/llm_detector.py +112 -0
texttools/tools/question_generator/__init__.py +1 -0
texttools/tools/question_generator/gemma_question_generator.py +198 -0
texttools/tools/reranker/__init__.py +3 -0
texttools/tools/reranker/reranker.py +137 -0
texttools/tools/reranker/scorer.py +216 -0
texttools/tools/reranker/sorter.py +278 -0
texttools/tools/rewriter/__init__.py +2 -0
texttools/tools/rewriter/gemma_question_rewriter.py +213 -0
texttools/tools/router/__init__.py +0 -0
texttools/tools/router/gemma_router.py +169 -0
texttools/tools/subject_to_question/__init__.py +1 -0
texttools/tools/subject_to_question/gemma_question_generator.py +224 -0
texttools/tools/summarizer/__init__.py +2 -0
texttools/tools/summarizer/gemma_summarizer.py +140 -0
texttools/tools/summarizer/llm_summerizer.py +108 -0
texttools/tools/translator/__init__.py +1 -0
texttools/tools/translator/gemma_translator.py +202 -0

hamtaa_texttools-0.1.43.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,60 @@
+Metadata-Version: 2.4
+Name: hamtaa-texttools
+Version: 0.1.43
+Summary: A set of high-level NLP tools
+Author: Tohidi, Montazer, Givechi, Mousavinezhad
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: openai==1.97.1
+Requires-Dist: numpy==1.26.4
+# Text Tools
+<p align="center">
+  <img src="https://img.shields.io/badge/TextTools-Python%20Text%20Processing-black?style=for-the-badge&logo=python&logoColor=white">
+</p>
+<p align="center">
+  <img src="docs/logo.png" alt="Preview" width="300" height="300">
+</p>
+### How to Install
+Install the package using:
+```bash
+pip install -U hamta-texttools
+```
+---
+## What This Library Is *Not*
+This is **not** a collection of low-level utilities.
+To clarify: this library **does not** include things like:
+- An standard `regex`
+- Word normalization utilities
+---
+## What This Library *Provides*
+This is a set of **high-level natural language processing (NLP)** tools.
+Some of the features include:
+- `question_detector`: Detecting if an incoming text is a question or not
+- `categorizer`: No finetuning need, categorizer
+- ... (Tell me what you want!)
+---
+## When to Use This Library
+Use `texttools` when:
+- You need to **process large volumes of data using OpenAI’s GPT models** via the BATCH API.
+- You want to treat an **LLM as a function** in Python that outputs structured JSON or Pydantic models.
+- You need to **categorize large datasets** using vector embeddings, efficiently and at scale.

hamtaa_texttools-0.1.43.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,60 @@
+texttools/__init__.py,sha256=cI10Q_zaM9DPUCVOM79gZceuyt6Pjgpj3R-AG7xgUM8,778
+texttools/base/__init__.py,sha256=KUGm-Oe0BxlrRhPS-Jm2q1NCmwX8MdtZtloia7bcLaM,189
+texttools/base/base_categorizer.py,sha256=ojup94iXLxh92TjiJmrFXeRbsWKlon7PPAqez96B1bs,1130
+texttools/base/base_keyword_extractor.py,sha256=uKpxb3xI-sim-vXWe1R4_36QRhSNsWDR4IuVdpkZMME,868
+texttools/base/base_ner_extractor.py,sha256=D0LRNSyq1uIU9Qtepi7zpCWWzYz-AOxpVNjq97S1oUA,1933
+texttools/base/base_question_detector.py,sha256=FR9yDP0Z8aAfGafZy3kcpSDUUYWLJM7saRKdeVN5TiM,829
+texttools/base/base_question_generator.py,sha256=L_2ZwqyV9GxsKiQynWKRJG15OBFgQqiCic5H0i8R5yk,3238
+texttools/base/base_question_merger.py,sha256=TYhsihKaIdyGCVu4AcjxPZ1_HocHt__voV8WWGMRpMs,1945
+texttools/base/base_question_rewriter.py,sha256=K6ZnAjxi2qw4yLxm92zTI1IStCfX6c_6lCfIuBDSx8w,1973
+texttools/base/base_router.py,sha256=pFDjIXFqAhPiS9Onu5py_GxOq8geDGJDQh6k6IhCkvw,933
+texttools/base/base_summarizer.py,sha256=7NAilhUPs6ZUwkBpTtXAj6n2XxQH1w6SOolf3gQX2gc,1627
+texttools/base/base_task_performer.py,sha256=3-6qshkie50S7pRG4WHRNC_RdUbSmHOPKW56CD92-rM,1852
+texttools/base/base_translator.py,sha256=BoOxqaoPoUs8t1O3m2yL9pQa5iwisl097immTVcGZoE,1020
+texttools/batch_manager/__init__.py,sha256=3ZkxA395lRD4gNxJ1vp0fNuz_XuBr50GoP51rrwQ0Ks,87
+texttools/batch_manager/batch_manager.py,sha256=jAmKskL3OTYwwsO1mWsWAB3VxMlOF07c2GW1Ev83ZhY,9283
+texttools/batch_manager/batch_runner.py,sha256=kW0IPauI11xpssApMA7b4XI19FePImywym3V7tBaa-o,7404
+texttools/formatter/__init__.py,sha256=KHz2tFZctbit_HVbQNCTMi46JzmKlg-uB6Ost63IpVU,46
+texttools/formatter/base.py,sha256=0fiM6E7NdJevAVpL6yyPaUZVJGKWxE3fr-Ay1oqgJqQ,879
+texttools/formatter/gemma3_formatter.py,sha256=c7YRj6fIPqhs_nvnSbWRTuguRoNQJvuIvk_bcaVDioM,1634
+texttools/handlers/__init__.py,sha256=sv0JloipQ57AI0xo-3w9k6cK5rYjZP3ltR2EbBhkHTA,121
+texttools/handlers/handlers.py,sha256=LtC4FBuzRUDy3Jw-Fp21WR-QS1jOcDhsGaMPFQGjfTw,2381
+texttools/handlers/categorizer/__init__.py,sha256=mE05vt_ma6vcP8pQ37BZ85WVQ8jhcjDS0iZV81_LFCY,127
+texttools/handlers/categorizer/categorizer.py,sha256=HBpdhtCGUPl1TJUOxbgSLmVWD7o9xeIjmSWXvYzGrCA,1592
+texttools/tools/__init__.py,sha256=V3ZjSj_ZI9r02sOmxpxxxKBbBbtuYS1MQqtrdGZHC_A,1121
+texttools/tools/categorizer/__init__.py,sha256=VY0SVdik0et0fwLDj7qn-d5LtVqVBIalvlRVci699i4,48
+texttools/tools/categorizer/encoder_model/__init__.py,sha256=7UwoPlQ09VGN0cqfi5fPQRfsZZ8hoZj6fL6cax1BLSU,53
+texttools/tools/categorizer/encoder_model/encoder_vectorizer.py,sha256=MHPVJQJlvNhZ5xLVXk4FtvrORW2yxPSAnjEhjPbkQts,1476
+texttools/tools/categorizer/llm/__init__.py,sha256=0VbxvInITfNUlOF6bJqcUKKaYWlIe9K3vRmIRuvAGcY,95
+texttools/tools/categorizer/llm/gemma_categorizer.py,sha256=tjwKonTjT5cAhxWQaVyvyooRyOlGACHpnn72PNoLk-8,5636
+texttools/tools/categorizer/llm/openai_categorizer.py,sha256=omRk77Z5ZCIAz17h4wPDP_EcBSsscA-PQJpQjtI6--o,2547
+texttools/tools/keyword_extractor/__init__.py,sha256=eTpujS85MmRRbnNwc2ifKUh60W8OG4RQFmWki3Z7C_0,84
+texttools/tools/keyword_extractor/gemma_extractor.py,sha256=TJ4wMPWRuuzRi_Q0hr7UauKhEg8U_5U5j1D_lTFrn4s,4349
+texttools/tools/merger/__init__.py,sha256=bh2RBpqJvDaqEmDrM9y_GcjRqibagifAxiZVu8nEHc0,115
+texttools/tools/merger/gemma_question_merger.py,sha256=JAC-52kBbabIzEWp0MFi9viiu8nZOAMPaJZALHvNMqo,8035
+texttools/tools/ner/__init__.py,sha256=BW84BcItel6Mc2JlaDL6qvAktVMkti67VXceeCnOB1g,70
+texttools/tools/ner/gemma_ner_extractor.py,sha256=YhyIwX_8bdwkFb4gY8g9mZdYHW_r1jCvbmjjNCK9Wfo,5384
+texttools/tools/question_detector/__init__.py,sha256=ulArGttooSoxEe0vUDQSxUQrnsxr7gH9l-LjSER2dVI,162
+texttools/tools/question_detector/gemma_detector.py,sha256=dHWHcthjMArW42CNPGmk3Xbj1AxjM33A34dOmLUA64U,4141
+texttools/tools/question_detector/llm_detector.py,sha256=zo89eh359hqQGGf83-6M22AaiH7q-m0m91SjTyxZaYs,3862
+texttools/tools/question_generator/__init__.py,sha256=EAElpB_YeyMoBqvFNjbW2a_j18SLtiKQ7sRmdS58Fww,61
+texttools/tools/question_generator/gemma_question_generator.py,sha256=V5QcXmHZ5shTvrThOxUrKJ4FqP0P58NIJbsPdyyy5IM,6744
+texttools/tools/reranker/__init__.py,sha256=70jqJ9cjpPzzvnMYgHYGVZ9PrWrN9N97visqD_PVxwU,100
+texttools/tools/reranker/reranker.py,sha256=2SiTMIxempMuHui2n4GJV_2dLGBeoC7WAn_rVVXlMBA,5518
+texttools/tools/reranker/scorer.py,sha256=fQ3Ya8QmNhrcmb-Rf-72hvhweGvVj6gQ4KOlham2eE8,8176
+texttools/tools/reranker/sorter.py,sha256=_ed5zGz7K60skPFFuEQZ1ObBFA71LAfVT6FyWicA-Pw,11419
+texttools/tools/rewriter/__init__.py,sha256=U_qwGeEOqHAcV4p2CHVb0AIvHKFfdvykRzGyWD54aWA,121
+texttools/tools/rewriter/gemma_question_rewriter.py,sha256=jXtRswfBvHn9QmE90JvxEmLvCTbwZqZhD_A5ONWeCzo,7925
+texttools/tools/router/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+texttools/tools/router/gemma_router.py,sha256=VX-kHphZVZNd0_ajugN08hGkWNUeUriwfonpYy2TIS4,5619
+texttools/tools/subject_to_question/__init__.py,sha256=VJpns16Qe5OL_-4WuGDUNShcJsodB2khGWT3Q1Hc-WU,72
+texttools/tools/subject_to_question/gemma_question_generator.py,sha256=VKXHhYHEvhFLUR87iEh0eFpD_4ueX4np8IjF-NkgWrY,7417
+texttools/tools/summarizer/__init__.py,sha256=phrR7qO20CNhO3hjXQBzhTRVumdVdGSufmH4GEYkhj4,140
+texttools/tools/summarizer/gemma_summarizer.py,sha256=ikhsBv7AiZD1dT_d12AyjXxojzSW92e2y5WjchI_3bE,4474
+texttools/tools/summarizer/llm_summerizer.py,sha256=-0rUKbSnl1aDeBfJ5DCSbIlwd2k-9qIaCKgoQJa0hWc,3412
+texttools/tools/translator/__init__.py,sha256=KO1m08J2BZwRqBGO9ICB4l4cnH1jfHLHL5HbgYFUWM8,72
+texttools/tools/translator/gemma_translator.py,sha256=57NMfJAZHQjZSr_eCBePE_Pnag8pu3O00Jicxhzn6Jc,7572
+hamtaa_texttools-0.1.43.dist-info/METADATA,sha256=GjVLyZZclY4hp29Yd1DpRtqvFDmTAGOoYEOI-FFvbA0,1482
+hamtaa_texttools-0.1.43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+hamtaa_texttools-0.1.43.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
+hamtaa_texttools-0.1.43.dist-info/RECORD,,

hamtaa_texttools-0.1.43.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

hamtaa_texttools-0.1.43.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ texttools

texttools/__init__.py ADDED Viewed

@@ -0,0 +1,26 @@
+from texttools.batch_manager import BatchJobRunner, SimpleBatchManager
+from texttools.handlers import (
+    NoOpResultHandler,
+    PrintResultHandler,
+    ResultHandler,
+    SaveToFileResultHandler,
+)
+from texttools.tools.categorizer.encoder_model.encoder_vectorizer import (
+    EmbeddingCategorizer,
+)
+from texttools.tools.categorizer.llm.openai_categorizer import LLMCategorizer
+from texttools.tools.question_detector.llm_detector import LLMQuestionDetector
+from texttools.tools.summarizer import LLMSummarizer
+__all__ = [
+    "LLMQuestionDetector",
+    "NoOpResultHandler",
+    "PrintResultHandler",
+    "ResultHandler",
+    "SaveToFileResultHandler",
+    "EmbeddingCategorizer",
+    "LLMCategorizer",
+    "SimpleBatchManager",
+    "BatchJobRunner",
+    "LLMSummarizer",
+]

texttools/base/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from texttools.base.base_categorizer import BaseCategorizer
+from texttools.base.base_question_detector import BaseQuestionDetector
+from texttools.base.base_summarizer import BaseSummarizer

texttools/base/base_categorizer.py ADDED Viewed

@@ -0,0 +1,40 @@
+import logging
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Optional
+from texttools.handlers import NoOpResultHandler, ResultHandler
+class BaseCategorizer(ABC):
+    def __init__(
+        self,
+        handlers: Optional[list[ResultHandler]] = None,
+    ):
+        """
+        handlers: List of ResultHandler objects that will process results after categorization.
+        """
+        self.handlers = handlers or [NoOpResultHandler()]
+    @abstractmethod
+    def categorize(self, text: str) -> Enum:
+        """
+        Categorize the input text.
+        Must return one of the Enum members defined in self.categories.
+        """
+        pass
+    def preprocess(self, text: str) -> str:
+        """
+        Optional: Preprocess text before categorization.
+        """
+        return text
+    def _dispatch(self, results: dict) -> None:
+        for handler in self.handlers:
+            try:
+                handler.handle(results)
+            except Exception:
+                logging.error(
+                    f"Handler {handler.__class__.__name__} failed", exc_info=True
+                )

texttools/base/base_keyword_extractor.py ADDED Viewed

@@ -0,0 +1,35 @@
+from abc import ABC, abstractmethod
+from typing import Any, Optional
+class BaseKeywordExtractor(ABC):
+    """
+    Base class for all detectors that output a list of keywords.
+    """
+    def __init__(
+        self,
+        handlers: Optional[list[Any]] = None,
+    ):
+        self.handlers = handlers or []
+    @abstractmethod
+    def extract_keywords(self, text: str) -> list[str]:
+        """
+        Extract keywords from the input text.
+        Should return a list of strings, where each string is a keyword.
+        """
+        pass
+    def preprocess(self, text: str) -> str:
+        """
+        Optional text preprocessing step.
+        """
+        return text.strip()
+    def _dispatch(self, result: dict) -> None:
+        """
+        Dispatch the result to handlers.
+        """
+        for handler in self.handlers:
+            handler.handle(result)

texttools/base/base_ner_extractor.py ADDED Viewed

@@ -0,0 +1,61 @@
+import logging
+from abc import ABC, abstractmethod
+from typing import Any, Optional
+class BaseNERExtractor(ABC):
+    """
+    Base class for all Named Entity Recognition (NER) systems.
+    """
+    def __init__(self, handlers: Optional[list[Any]] = None):
+        """
+        Initializes the BaseNERExtractor with optional result handlers.
+        :param handlers: Optional list of handlers to process the NER results.
+        """
+        self.handlers = handlers or []
+    @abstractmethod
+    def extract_entities(self, text: str) -> list[dict[str, str]]:
+        """
+        Extracts named entities from the input text.
+        :param text: The text from which to extract entities.
+        :return: A list of dictionaries, where each dictionary represents an entity
+                 and typically includes 'text' and 'type' keys (e.g.,
+                 [{"text": "John Doe", "type": "PERSON"}, ...]).
+        """
+        pass
+    def preprocess(self, text: str) -> str:
+        """
+        Optional: Preprocess the input text before entity extraction.
+        :param text: Raw input text.
+        :return: Preprocessed text.
+        """
+        return text.strip()
+    def _dispatch(
+        self, entities: list[dict[str, str]], original_text: Optional[str] = None
+    ) -> None:
+        """
+        Sends the extracted entities to any registered result handlers.
+        :param entities: The list of extracted entities.
+        :param original_text: Optionally pass the original text.
+        """
+        result_data = {
+            "entities": entities,
+        }
+        if original_text is not None:
+            result_data["original_text"] = original_text
+        for handler in self.handlers:
+            try:
+                handler.handle(result_data)
+            except Exception:
+                logging.error(
+                    f"Handler {handler.__class__.__name__} failed", exc_info=True
+                )

texttools/base/base_question_detector.py ADDED Viewed

@@ -0,0 +1,35 @@
+from abc import ABC, abstractmethod
+from typing import Any, Optional
+class BaseQuestionDetector(ABC):
+    """
+    Base class for all detectors that output a boolean (True/False).
+    """
+    def __init__(
+        self,
+        handlers: Optional[list[Any]] = None,
+    ):
+        self.handlers = handlers or []
+    @abstractmethod
+    def detect(self, text: str) -> bool:
+        """
+        Detect if the input text meets the condition.
+        Should return True or False.
+        """
+        pass
+    def preprocess(self, text: str) -> str:
+        """
+        Optional text preprocessing step.
+        """
+        return text.strip()
+    def _dispatch(self, result: dict) -> None:
+        """
+        Dispatch the result to handlers.
+        """
+        for handler in self.handlers:
+            handler.handle(result)

texttools/base/base_question_generator.py ADDED Viewed

@@ -0,0 +1,99 @@
+import logging
+from abc import ABC, abstractmethod
+from typing import Any, Optional
+class BaseQuestionGenerator(ABC):
+    """
+    Base class for all systems that generate a question from a given answer.
+    """
+    def __init__(self, handlers: Optional[list[Any]] = None):
+        """
+        Initializes the BaseQuestionGenerator with optional result handlers.
+        :param handlers: Optional list of handlers to process the generation results.
+        """
+        self.handlers = handlers or []
+    @abstractmethod
+    def generate_question(self, answer: str) -> str:
+        """
+        Generates an appropriate question for the provided answer.
+        :param answer: The answer string for which a question needs to be generated.
+        :return: The generated question string.
+        """
+        pass
+    def preprocess(self, text: str) -> str:
+        """
+        Optional: Preprocess the input answer text before question generation.
+        :param text: Raw input answer text.
+        :return: Preprocessed text.
+        """
+        return text.strip()
+    def _dispatch(self, result_data: dict) -> None:
+        """
+        Sends the generated question and original answer to any registered result handlers.
+        :param result_data: A dictionary containing the results (e.g., {"original_answer": ..., "generated_question": ...}).
+        """
+        for handler in self.handlers:
+            try:
+                handler.handle(result_data)
+            except Exception:
+                logging.error(
+                    f"Handler {handler.__class__.__name__} failed", exc_info=True
+                )
+class BaseQuestionGeneratorFromSubject(ABC):
+    """
+    Base class for all systems that generate a question from a given subject
+    it will curate some number of questions
+    """
+    def __init__(self, handlers: Optional[list[Any]] = None):
+        """
+        Initializes the BaseQuestionGeneratorFromSubject with optional result handlers.
+        :param handlers: Optional list of handlers to process the generation results.
+        """
+        self.handlers = handlers or []
+    @abstractmethod
+    def generate_question(self, subject: str) -> str:
+        """
+        Generates an appropriate question for the provided answer.
+        :param answer: The answer string for which a question needs to be generated.
+        :return: The generated question string.
+        """
+        pass
+    def preprocess(self, text: str) -> str:
+        """
+        Optional: Preprocess the input answer text before question generation.
+        :param text: Raw input answer text.
+        :return: Preprocessed text.
+        """
+        return text.strip()
+    def _dispatch(self, result_data: dict) -> None:
+        """
+        Sends the generated question and original answer to any registered result handlers.
+        :param result_data: A dictionary containing the results (e.g., {"original_answer": ..., "generated_question": ...}).
+        """
+        for handler in self.handlers:
+            try:
+                handler.handle(result_data)
+            except Exception:
+                logging.error(
+                    f"Handler {handler.__class__.__name__} failed", exc_info=True
+                )

texttools/base/base_question_merger.py ADDED Viewed

@@ -0,0 +1,59 @@
+import logging
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Any, Optional
+class MergingMode(Enum):
+    """
+    Defines the two modes for question merging.
+    """
+    DEFAULT_MODE = "immediate merging"
+    REASON_MODE = "merging with reasoning"
+class BaseQuestionsMerger(ABC):
+    """
+    Base class for all systems that merges more that one question with preserving the contents.
+    """
+    def __init__(self, handlers: Optional[list[Any]] = None):
+        """
+        Initializes the BaseQuestionsMerger with optional result handlers.
+        :param handlers: Optional list of handlers to process the merged results.
+        """
+        self.handlers = handlers or []
+    @abstractmethod
+    def merging_question(self, questions: list[str], mode: MergingMode) -> str:
+        """
+        merges the input questions based on the specified mode.
+        :param question: The original questions' string as a list.
+        :param mode: The MergingMode indicating how the questions should be merged.
+        :return: The rephrased and merged question string.
+        """
+        pass
+    def preprocess(self, text: str) -> str:
+        """
+        Optional: Preprocess the input questions' text before merging.
+        :param text: Raw input question's texts.
+        :return: Preprocessed text.
+        """
+        return text.strip()
+    def _dispatch(self, result_data: dict) -> None:
+        """
+        Sends the merged question and original questions to any registered result handlers.
+        :param result_data: A dictionary containing the results (e.g., {"original_question": ..., "rewritten_question": ..., "mode": ...}).
+        """
+        for handler in self.handlers:
+            try:
+                handler.handle(result_data)
+            except Exception:
+                logging.error(
+                    f"Handler {handler.__class__.__name__} failed", exc_info=True
+                )

texttools/base/base_question_rewriter.py ADDED Viewed

@@ -0,0 +1,61 @@
+import logging
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Any, Optional
+class RewriteMode(Enum):
+    """
+    Defines the two modes for question rewriting.
+    """
+    SAME_MEANING_DIFFERENT_WORDING = "same_meaning_different_wording"
+    DIFFERENT_MEANING_SIMILAR_WORDING = "different_meaning_similar_wording"
+class BaseQuestionRewriter(ABC):
+    """
+    Base class for all systems that rewrite a question with different wording.
+    """
+    def __init__(self, handlers: Optional[list[Any]] = None):
+        """
+        Initializes the BaseQuestionRewriter with optional result handlers.
+        :param handlers: Optional list of handlers to process the rewriting results.
+        """
+        self.handlers = handlers or []
+    @abstractmethod
+    def rewrite_question(self, question: str, mode: RewriteMode) -> str:
+        """
+        Rewrites the input question based on the specified mode.
+        :param question: The original question string.
+        :param mode: The RewriteMode indicating how the question should be rewritten.
+        :return: The rephrased question string.
+        """
+        pass
+    def preprocess(self, text: str) -> str:
+        """
+        Optional: Preprocess the input question text before rewriting.
+        :param text: Raw input question text.
+        :return: Preprocessed text.
+        """
+        return text.strip()
+    def _dispatch(self, result_data: dict) -> None:
+        """
+        Sends the rewritten question and original question to any registered result handlers.
+        :param result_data: A dictionary containing the results (e.g., {"original_question": ..., "rewritten_question": ..., "mode": ...}).
+        """
+        for handler in self.handlers:
+            try:
+                handler.handle(result_data)
+            except Exception:
+                logging.error(
+                    f"Handler {handler.__class__.__name__} failed", exc_info=True
+                )

texttools/base/base_router.py ADDED Viewed

@@ -0,0 +1,33 @@
+from abc import ABC, abstractmethod
+from typing import Optional
+from texttools.handlers import NoOpResultHandler, ResultHandler
+class BaseRouter(ABC):
+    def __init__(self, handlers: Optional[list[ResultHandler]] = None):
+        """
+        Base class for routers
+        :param handlers: Optional list of handlers to process the summarization result.
+        """
+        self.handlers = handlers or [NoOpResultHandler()]
+    @abstractmethod
+    def route(self, text: str) -> str:
+        """
+        decides and classifies the inputted text between the choices that it has
+        :param text: The text to summarize.
+        :return: A route for the given text.
+        """
+        pass
+    def preprocess(self, text: str) -> str:
+        """
+        Optional: Preprocess the input text before summarization.
+        :param text: Raw input text.
+        :return: Preprocessed text.
+        """
+        return text.strip()

texttools/base/base_summarizer.py ADDED Viewed

@@ -0,0 +1,55 @@
+import logging
+from abc import ABC, abstractmethod
+from typing import Optional
+from texttools.handlers import NoOpResultHandler, ResultHandler
+class BaseSummarizer(ABC):
+    def __init__(self, handlers: Optional[list[ResultHandler]] = None):
+        """
+        Base class for text summarization.
+        :param handlers: Optional list of handlers to process the summarization result.
+        """
+        self.handlers = handlers or [NoOpResultHandler()]
+    @abstractmethod
+    def summarize(self, text: str) -> str:
+        """
+        Generate a summary for the input text.
+        :param text: The text to summarize.
+        :return: A summary string.
+        """
+        pass
+    def preprocess(self, text: str) -> str:
+        """
+        Optional: Preprocess the input text before summarization.
+        :param text: Raw input text.
+        :return: Preprocessed text.
+        """
+        return text
+    def _dispatch(self, summary: str, original_text: Optional[str] = None) -> None:
+        """
+        Send the summary result to any registered result handlers.
+        :param summary: The generated summary.
+        :param original_text: Optionally pass the original text.
+        """
+        result_data = {
+            "summary": summary,
+        }
+        if original_text is not None:
+            result_data["original_text"] = original_text
+        for handler in self.handlers:
+            try:
+                handler.handle(result_data)
+            except Exception:
+                logging.error(
+                    f"Handler {handler.__class__.__name__} failed", exc_info=True
+                )