PyPI - hamtaa-texttools - Versions diffs - 0.1.44__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

hamtaa-texttools 0.1.44py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (70) hide show

hamtaa_texttools-1.0.0.dist-info/METADATA +129 -0
hamtaa_texttools-1.0.0.dist-info/RECORD +17 -0
hamtaa_texttools-1.0.0.dist-info/licenses/LICENSE +21 -0
{hamtaa_texttools-0.1.44.dist-info → hamtaa_texttools-1.0.0.dist-info}/top_level.txt +0 -0
texttools/__init__.py +4 -21
texttools/formatters/base_formatter.py +33 -0
texttools/formatters/user_merge_formatter/user_merge_formatter.py +47 -0
texttools/tools/__init__.py +2 -32
texttools/tools/operator.py +236 -0
texttools/tools/output_models.py +54 -0
texttools/tools/prompt_loader.py +84 -0
texttools/tools/the_tool.py +291 -0
texttools/utils/__init__.py +4 -0
texttools/{batch_manager → utils/batch_manager}/__init__.py +2 -0
texttools/{batch_manager → utils/batch_manager}/batch_manager.py +11 -12
texttools/{batch_manager → utils/batch_manager}/batch_runner.py +20 -15
hamtaa_texttools-0.1.44.dist-info/METADATA +0 -60
hamtaa_texttools-0.1.44.dist-info/RECORD +0 -60
texttools/base/__init__.py +0 -3
texttools/base/base_categorizer.py +0 -40
texttools/base/base_keyword_extractor.py +0 -35
texttools/base/base_ner_extractor.py +0 -61
texttools/base/base_question_detector.py +0 -35
texttools/base/base_question_generator.py +0 -99
texttools/base/base_question_merger.py +0 -59
texttools/base/base_question_rewriter.py +0 -61
texttools/base/base_router.py +0 -33
texttools/base/base_summarizer.py +0 -55
texttools/base/base_task_performer.py +0 -53
texttools/base/base_translator.py +0 -38
texttools/formatter/__init__.py +0 -1
texttools/formatter/base.py +0 -26
texttools/formatter/gemma3_formatter.py +0 -54
texttools/handlers/__init__.py +0 -6
texttools/handlers/categorizer/__init__.py +0 -6
texttools/handlers/categorizer/categorizer.py +0 -61
texttools/handlers/handlers.py +0 -88
texttools/tools/categorizer/__init__.py +0 -2
texttools/tools/categorizer/encoder_model/__init__.py +0 -1
texttools/tools/categorizer/encoder_model/encoder_vectorizer.py +0 -51
texttools/tools/categorizer/llm/__init__.py +0 -2
texttools/tools/categorizer/llm/gemma_categorizer.py +0 -169
texttools/tools/categorizer/llm/openai_categorizer.py +0 -80
texttools/tools/keyword_extractor/__init__.py +0 -1
texttools/tools/keyword_extractor/gemma_extractor.py +0 -138
texttools/tools/merger/__init__.py +0 -2
texttools/tools/merger/gemma_question_merger.py +0 -214
texttools/tools/ner/__init__.py +0 -1
texttools/tools/ner/gemma_ner_extractor.py +0 -157
texttools/tools/question_detector/__init__.py +0 -2
texttools/tools/question_detector/gemma_detector.py +0 -114
texttools/tools/question_detector/llm_detector.py +0 -112
texttools/tools/question_generator/__init__.py +0 -1
texttools/tools/question_generator/gemma_question_generator.py +0 -198
texttools/tools/reranker/__init__.py +0 -3
texttools/tools/reranker/reranker.py +0 -137
texttools/tools/reranker/scorer.py +0 -216
texttools/tools/reranker/sorter.py +0 -278
texttools/tools/rewriter/__init__.py +0 -2
texttools/tools/rewriter/gemma_question_rewriter.py +0 -213
texttools/tools/router/__init__.py +0 -0
texttools/tools/router/gemma_router.py +0 -169
texttools/tools/subject_to_question/__init__.py +0 -1
texttools/tools/subject_to_question/gemma_question_generator.py +0 -224
texttools/tools/summarizer/__init__.py +0 -2
texttools/tools/summarizer/gemma_summarizer.py +0 -140
texttools/tools/summarizer/llm_summerizer.py +0 -108
texttools/tools/translator/__init__.py +0 -1
texttools/tools/translator/gemma_translator.py +0 -202
{hamtaa_texttools-0.1.44.dist-info → hamtaa_texttools-1.0.0.dist-info}/WHEEL +0 -0

texttools/base/base_categorizer.py DELETED Viewed

@@ -1,40 +0,0 @@
-import logging
-from abc import ABC, abstractmethod
-from enum import Enum
-from typing import Optional
-from texttools.handlers import NoOpResultHandler, ResultHandler
-class BaseCategorizer(ABC):
-    def __init__(
-        self,
-        handlers: Optional[list[ResultHandler]] = None,
-    ):
-        """
-        handlers: List of ResultHandler objects that will process results after categorization.
-        """
-        self.handlers = handlers or [NoOpResultHandler()]
-    @abstractmethod
-    def categorize(self, text: str) -> Enum:
-        """
-        Categorize the input text.
-        Must return one of the Enum members defined in self.categories.
-        """
-        pass
-    def preprocess(self, text: str) -> str:
-        """
-        Optional: Preprocess text before categorization.
-        """
-        return text
-    def _dispatch(self, results: dict) -> None:
-        for handler in self.handlers:
-            try:
-                handler.handle(results)
-            except Exception:
-                logging.error(
-                    f"Handler {handler.__class__.__name__} failed", exc_info=True
-                )

texttools/base/base_keyword_extractor.py DELETED Viewed

@@ -1,35 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any, Optional
-class BaseKeywordExtractor(ABC):
-    """
-    Base class for all detectors that output a list of keywords.
-    """
-    def __init__(
-        self,
-        handlers: Optional[list[Any]] = None,
-    ):
-        self.handlers = handlers or []
-    @abstractmethod
-    def extract_keywords(self, text: str) -> list[str]:
-        """
-        Extract keywords from the input text.
-        Should return a list of strings, where each string is a keyword.
-        """
-        pass
-    def preprocess(self, text: str) -> str:
-        """
-        Optional text preprocessing step.
-        """
-        return text.strip()
-    def _dispatch(self, result: dict) -> None:
-        """
-        Dispatch the result to handlers.
-        """
-        for handler in self.handlers:
-            handler.handle(result)

texttools/base/base_ner_extractor.py DELETED Viewed

@@ -1,61 +0,0 @@
-import logging
-from abc import ABC, abstractmethod
-from typing import Any, Optional
-class BaseNERExtractor(ABC):
-    """
-    Base class for all Named Entity Recognition (NER) systems.
-    """
-    def __init__(self, handlers: Optional[list[Any]] = None):
-        """
-        Initializes the BaseNERExtractor with optional result handlers.
-        :param handlers: Optional list of handlers to process the NER results.
-        """
-        self.handlers = handlers or []
-    @abstractmethod
-    def extract_entities(self, text: str) -> list[dict[str, str]]:
-        """
-        Extracts named entities from the input text.
-        :param text: The text from which to extract entities.
-        :return: A list of dictionaries, where each dictionary represents an entity
-                 and typically includes 'text' and 'type' keys (e.g.,
-                 [{"text": "John Doe", "type": "PERSON"}, ...]).
-        """
-        pass
-    def preprocess(self, text: str) -> str:
-        """
-        Optional: Preprocess the input text before entity extraction.
-        :param text: Raw input text.
-        :return: Preprocessed text.
-        """
-        return text.strip()
-    def _dispatch(
-        self, entities: list[dict[str, str]], original_text: Optional[str] = None
-    ) -> None:
-        """
-        Sends the extracted entities to any registered result handlers.
-        :param entities: The list of extracted entities.
-        :param original_text: Optionally pass the original text.
-        """
-        result_data = {
-            "entities": entities,
-        }
-        if original_text is not None:
-            result_data["original_text"] = original_text
-        for handler in self.handlers:
-            try:
-                handler.handle(result_data)
-            except Exception:
-                logging.error(
-                    f"Handler {handler.__class__.__name__} failed", exc_info=True
-                )

texttools/base/base_question_detector.py DELETED Viewed

@@ -1,35 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any, Optional
-class BaseQuestionDetector(ABC):
-    """
-    Base class for all detectors that output a boolean (True/False).
-    """
-    def __init__(
-        self,
-        handlers: Optional[list[Any]] = None,
-    ):
-        self.handlers = handlers or []
-    @abstractmethod
-    def detect(self, text: str) -> bool:
-        """
-        Detect if the input text meets the condition.
-        Should return True or False.
-        """
-        pass
-    def preprocess(self, text: str) -> str:
-        """
-        Optional text preprocessing step.
-        """
-        return text.strip()
-    def _dispatch(self, result: dict) -> None:
-        """
-        Dispatch the result to handlers.
-        """
-        for handler in self.handlers:
-            handler.handle(result)

texttools/base/base_question_generator.py DELETED Viewed

@@ -1,99 +0,0 @@
-import logging
-from abc import ABC, abstractmethod
-from typing import Any, Optional
-class BaseQuestionGenerator(ABC):
-    """
-    Base class for all systems that generate a question from a given answer.
-    """
-    def __init__(self, handlers: Optional[list[Any]] = None):
-        """
-        Initializes the BaseQuestionGenerator with optional result handlers.
-        :param handlers: Optional list of handlers to process the generation results.
-        """
-        self.handlers = handlers or []
-    @abstractmethod
-    def generate_question(self, answer: str) -> str:
-        """
-        Generates an appropriate question for the provided answer.
-        :param answer: The answer string for which a question needs to be generated.
-        :return: The generated question string.
-        """
-        pass
-    def preprocess(self, text: str) -> str:
-        """
-        Optional: Preprocess the input answer text before question generation.
-        :param text: Raw input answer text.
-        :return: Preprocessed text.
-        """
-        return text.strip()
-    def _dispatch(self, result_data: dict) -> None:
-        """
-        Sends the generated question and original answer to any registered result handlers.
-        :param result_data: A dictionary containing the results (e.g., {"original_answer": ..., "generated_question": ...}).
-        """
-        for handler in self.handlers:
-            try:
-                handler.handle(result_data)
-            except Exception:
-                logging.error(
-                    f"Handler {handler.__class__.__name__} failed", exc_info=True
-                )
-class BaseQuestionGeneratorFromSubject(ABC):
-    """
-    Base class for all systems that generate a question from a given subject
-    it will curate some number of questions
-    """
-    def __init__(self, handlers: Optional[list[Any]] = None):
-        """
-        Initializes the BaseQuestionGeneratorFromSubject with optional result handlers.
-        :param handlers: Optional list of handlers to process the generation results.
-        """
-        self.handlers = handlers or []
-    @abstractmethod
-    def generate_question(self, subject: str) -> str:
-        """
-        Generates an appropriate question for the provided answer.
-        :param answer: The answer string for which a question needs to be generated.
-        :return: The generated question string.
-        """
-        pass
-    def preprocess(self, text: str) -> str:
-        """
-        Optional: Preprocess the input answer text before question generation.
-        :param text: Raw input answer text.
-        :return: Preprocessed text.
-        """
-        return text.strip()
-    def _dispatch(self, result_data: dict) -> None:
-        """
-        Sends the generated question and original answer to any registered result handlers.
-        :param result_data: A dictionary containing the results (e.g., {"original_answer": ..., "generated_question": ...}).
-        """
-        for handler in self.handlers:
-            try:
-                handler.handle(result_data)
-            except Exception:
-                logging.error(
-                    f"Handler {handler.__class__.__name__} failed", exc_info=True
-                )

texttools/base/base_question_merger.py DELETED Viewed

@@ -1,59 +0,0 @@
-import logging
-from abc import ABC, abstractmethod
-from enum import Enum
-from typing import Any, Optional
-class MergingMode(Enum):
-    """
-    Defines the two modes for question merging.
-    """
-    DEFAULT_MODE = "immediate merging"
-    REASON_MODE = "merging with reasoning"
-class BaseQuestionsMerger(ABC):
-    """
-    Base class for all systems that merges more that one question with preserving the contents.
-    """
-    def __init__(self, handlers: Optional[list[Any]] = None):
-        """
-        Initializes the BaseQuestionsMerger with optional result handlers.
-        :param handlers: Optional list of handlers to process the merged results.
-        """
-        self.handlers = handlers or []
-    @abstractmethod
-    def merging_question(self, questions: list[str], mode: MergingMode) -> str:
-        """
-        merges the input questions based on the specified mode.
-        :param question: The original questions' string as a list.
-        :param mode: The MergingMode indicating how the questions should be merged.
-        :return: The rephrased and merged question string.
-        """
-        pass
-    def preprocess(self, text: str) -> str:
-        """
-        Optional: Preprocess the input questions' text before merging.
-        :param text: Raw input question's texts.
-        :return: Preprocessed text.
-        """
-        return text.strip()
-    def _dispatch(self, result_data: dict) -> None:
-        """
-        Sends the merged question and original questions to any registered result handlers.
-        :param result_data: A dictionary containing the results (e.g., {"original_question": ..., "rewritten_question": ..., "mode": ...}).
-        """
-        for handler in self.handlers:
-            try:
-                handler.handle(result_data)
-            except Exception:
-                logging.error(
-                    f"Handler {handler.__class__.__name__} failed", exc_info=True
-                )

texttools/base/base_question_rewriter.py DELETED Viewed

@@ -1,61 +0,0 @@
-import logging
-from abc import ABC, abstractmethod
-from enum import Enum
-from typing import Any, Optional
-class RewriteMode(Enum):
-    """
-    Defines the two modes for question rewriting.
-    """
-    SAME_MEANING_DIFFERENT_WORDING = "same_meaning_different_wording"
-    DIFFERENT_MEANING_SIMILAR_WORDING = "different_meaning_similar_wording"
-class BaseQuestionRewriter(ABC):
-    """
-    Base class for all systems that rewrite a question with different wording.
-    """
-    def __init__(self, handlers: Optional[list[Any]] = None):
-        """
-        Initializes the BaseQuestionRewriter with optional result handlers.
-        :param handlers: Optional list of handlers to process the rewriting results.
-        """
-        self.handlers = handlers or []
-    @abstractmethod
-    def rewrite_question(self, question: str, mode: RewriteMode) -> str:
-        """
-        Rewrites the input question based on the specified mode.
-        :param question: The original question string.
-        :param mode: The RewriteMode indicating how the question should be rewritten.
-        :return: The rephrased question string.
-        """
-        pass
-    def preprocess(self, text: str) -> str:
-        """
-        Optional: Preprocess the input question text before rewriting.
-        :param text: Raw input question text.
-        :return: Preprocessed text.
-        """
-        return text.strip()
-    def _dispatch(self, result_data: dict) -> None:
-        """
-        Sends the rewritten question and original question to any registered result handlers.
-        :param result_data: A dictionary containing the results (e.g., {"original_question": ..., "rewritten_question": ..., "mode": ...}).
-        """
-        for handler in self.handlers:
-            try:
-                handler.handle(result_data)
-            except Exception:
-                logging.error(
-                    f"Handler {handler.__class__.__name__} failed", exc_info=True
-                )

texttools/base/base_router.py DELETED Viewed

@@ -1,33 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Optional
-from texttools.handlers import NoOpResultHandler, ResultHandler
-class BaseRouter(ABC):
-    def __init__(self, handlers: Optional[list[ResultHandler]] = None):
-        """
-        Base class for routers
-        :param handlers: Optional list of handlers to process the summarization result.
-        """
-        self.handlers = handlers or [NoOpResultHandler()]
-    @abstractmethod
-    def route(self, text: str) -> str:
-        """
-        decides and classifies the inputted text between the choices that it has
-        :param text: The text to summarize.
-        :return: A route for the given text.
-        """
-        pass
-    def preprocess(self, text: str) -> str:
-        """
-        Optional: Preprocess the input text before summarization.
-        :param text: Raw input text.
-        :return: Preprocessed text.
-        """
-        return text.strip()

texttools/base/base_summarizer.py DELETED Viewed

@@ -1,55 +0,0 @@
-import logging
-from abc import ABC, abstractmethod
-from typing import Optional
-from texttools.handlers import NoOpResultHandler, ResultHandler
-class BaseSummarizer(ABC):
-    def __init__(self, handlers: Optional[list[ResultHandler]] = None):
-        """
-        Base class for text summarization.
-        :param handlers: Optional list of handlers to process the summarization result.
-        """
-        self.handlers = handlers or [NoOpResultHandler()]
-    @abstractmethod
-    def summarize(self, text: str) -> str:
-        """
-        Generate a summary for the input text.
-        :param text: The text to summarize.
-        :return: A summary string.
-        """
-        pass
-    def preprocess(self, text: str) -> str:
-        """
-        Optional: Preprocess the input text before summarization.
-        :param text: Raw input text.
-        :return: Preprocessed text.
-        """
-        return text
-    def _dispatch(self, summary: str, original_text: Optional[str] = None) -> None:
-        """
-        Send the summary result to any registered result handlers.
-        :param summary: The generated summary.
-        :param original_text: Optionally pass the original text.
-        """
-        result_data = {
-            "summary": summary,
-        }
-        if original_text is not None:
-            result_data["original_text"] = original_text
-        for handler in self.handlers:
-            try:
-                handler.handle(result_data)
-            except Exception:
-                logging.error(
-                    f"Handler {handler.__class__.__name__} failed", exc_info=True
-                )

texttools/base/base_task_performer.py DELETED Viewed

@@ -1,53 +0,0 @@
-import logging
-from abc import ABC, abstractmethod
-from typing import Any, Optional
-class BaseTaskPerformer(ABC):
-    """
-    Base class for common functionalities of LLM-based task performers.
-    This includes features like text preprocessing and dispatching results
-    to registered handlers.
-    """
-    def __init__(self, handlers: Optional[list[Any]] = None):
-        """
-        Initializes the BaseTaskPerformer with optional result handlers.
-        :param handlers: An optional list of handlers to process the component's results.
-        """
-        self.handlers = handlers or []
-    def _preprocess(self, text: str) -> str:
-        """
-        Preprocesses input text by stripping leading/trailing whitespace.
-        This can be extended for more complex preprocessing if needed.
-        :param text: The raw input text.
-        :return: The preprocessed text.
-        """
-        return text.strip()
-    @abstractmethod
-    def perform(self, *args, **kwargs) -> Any:
-        """
-        Abstract method to be implemented by concrete task performers.
-        This method will execute the primary task of the class (e.g., scoring, sorting).
-        The signature of args and kwargs will vary based on the specific task.
-        """
-        pass
-    def _dispatch(self, result_data: dict[str, Any]) -> None:
-        """
-        Dispatches the component's results to any registered result handlers.
-        Each handler receives a dictionary of result data.
-        :param result_data: A dictionary containing the results specific to the component.
-        """
-        for handler in self.handlers:
-            try:
-                handler.handle(result_data)
-            except Exception as e:
-                logging.error(
-                    f"Handler {handler.__class__.__name__} failed: {e}", exc_info=True
-                )

texttools/base/base_translator.py DELETED Viewed

@@ -1,38 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any, Optional
-class BaseTranslator(ABC):
-    """
-    Base class for all translators that  output a translated string.
-    """
-    def __init__(
-        self,
-        handlers: Optional[list[Any]] = None,
-    ):
-        self.handlers = handlers or []
-    @abstractmethod
-    def translate(
-        self, text: str, target_language: str, source_language: Optional[str] = None
-    ) -> str:
-        """
-        Translate the input text from the source language to the target language.
-        Should return the translated string.
-        The source_language can be optional if the LLM can detect it automatically.
-        """
-        pass
-    def preprocess(self, text: str) -> str:
-        """
-        Optional text preprocessing step.
-        """
-        return text.strip()
-    def _dispatch(self, result: dict) -> None:
-        """
-        Dispatch the result to handlers.
-        """
-        for handler in self.handlers:
-            handler.handle(result)

texttools/formatter/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- from .gemma3_formatter import Gemma3Formatter

texttools/formatter/base.py DELETED Viewed

@@ -1,26 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any, Optional
-class ChatFormatter(ABC):
-    """
-    Given (raw_text, reason, maybe other hints), produce whatever payload
-    A) single string prompt (for providers that don t support multiple messages), or
-    B) list of {role, content} dicts, or
-    C) whatever shape the provider needs.
-    """
-    @abstractmethod
-    def format(
-        self,
-        text: str,
-        reason: Optional[str],
-        schema_instr: str,
-        prompt_template: Optional[str],
-    ) -> Any:
-        """
-        - For an OpenAI style API, this might return list[{"role": "user"/"assistant", "content": "…"}].
-        - For a one shot “text only” API, this might return a single string combining everything.
-        - For some niche service, it might return JSON: {"inputs": […], "parameters": {…}}.
-        """
-        pass

texttools/formatter/gemma3_formatter.py DELETED Viewed

@@ -1,54 +0,0 @@
-from texttools.formatter.base import ChatFormatter
-class Gemma3Formatter(ChatFormatter):
-    """
-    Formatter that merges consecutive user messages (strings) with '\n'
-    and leaves assistant messages alone. No image‐handling, no extra tokens.
-    """
-    ROLE = "role"
-    CONTENT = "content"
-    USER_ROLE = "user"
-    ASSISTANT_ROLE = "assistant"
-    VALID_ROLES = {USER_ROLE, ASSISTANT_ROLE}
-    VALID_KEYS = {ROLE, CONTENT}
-    def format(self, messages: list[dict[str, str]]) -> list[dict[str, str]]:
-        """
-        :param messages: list of {"role": ..., "content": ...}, where role is "user", "assistant", or "system"
-        :return: a new list where consecutive "user" messages are merged into single entries
-        """
-        merged: list[dict[str, str]] = []
-        for message in messages:
-            # Validate keys strictly
-            if set(message.keys()) != self.VALID_KEYS:
-                raise ValueError(
-                    f"Message dict keys must be exactly {self.VALID_KEYS}, got {set(message.keys())}"
-                )
-            role, content = message[self.ROLE], message[self.CONTENT].strip()
-            # Replace "system" role with "user" role
-            if role == "system":
-                role = self.USER_ROLE
-            # Raise value error if message["role"] wan't a valid role
-            if role not in self.VALID_ROLES:
-                raise ValueError(f"Unexpected role: {role}")
-            # Merge with previous user turn
-            if (
-                merged
-                and role == self.USER_ROLE
-                and merged[-1][self.ROLE] == self.USER_ROLE
-            ):
-                merged[-1][self.CONTENT] += "\n" + content
-            # Otherwise, start a new turn
-            else:
-                merged.append({self.ROLE: role, self.CONTENT: content})
-        return merged

texttools/handlers/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-from .handlers import (
-    NoOpResultHandler,
-    PrintResultHandler,
-    ResultHandler,
-    SaveToFileResultHandler,
-)

texttools/handlers/categorizer/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-from .categorizer import (
-    ResultHandler,
-    NoOpResultHandler,
-    PrintResultHandler,
-    SaveToElasticResultHandler,
-)

hamtaa-texttools 0.1.44__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

hamtaa-texttools 0.1.44py3-none-any.whl → 1.0.0py3-none-any.whl