PyPI - beekeeper-core - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.10__py3-none-any.whl - Mend

beekeeper-core 1.0.1py3-none-any.whl → 1.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

beekeeper/core/document/base.py +1 -1
beekeeper/core/embeddings/base.py +49 -7
beekeeper/core/evaluation/context_similarity.py +14 -14
beekeeper/core/flows/ingestion_flow.py +16 -16
beekeeper/core/guardrails/__init__.py +4 -0
beekeeper/core/guardrails/base.py +15 -0
beekeeper/core/guardrails/types.py +13 -0
beekeeper/core/llms/__init__.py +1 -3
beekeeper/core/llms/base.py +14 -10
beekeeper/core/llms/decorators.py +80 -1
beekeeper/core/llms/types.py +4 -0
beekeeper/core/monitors/__init__.py +3 -0
beekeeper/core/monitors/base.py +36 -0
beekeeper/core/monitors/types.py +11 -0
beekeeper/core/observers/__init__.py +2 -2
beekeeper/core/observers/base.py +25 -28
beekeeper/core/prompts/__init__.py +1 -1
beekeeper/core/prompts/base.py +17 -5
beekeeper/core/readers/base.py +1 -7
beekeeper/core/readers/directory.py +5 -5
beekeeper/core/text_chunkers/base.py +22 -5
beekeeper/core/text_chunkers/semantic.py +13 -13
beekeeper/core/text_chunkers/sentence.py +15 -15
beekeeper/core/text_chunkers/token.py +15 -15
beekeeper/core/tools/base.py +1 -1
beekeeper/core/vector_stores/base.py +11 -2
{beekeeper_core-1.0.1.dist-info → beekeeper_core-1.0.10.dist-info}/METADATA +7 -7
beekeeper_core-1.0.10.dist-info/RECORD +43 -0
beekeeper_core-1.0.1.dist-info/RECORD +0 -37
{beekeeper_core-1.0.1.dist-info → beekeeper_core-1.0.10.dist-info}/WHEEL +0 -0

beekeeper/core/document/base.py CHANGED Viewed

@@ -8,7 +8,7 @@ from pydantic.v1 import BaseModel, Field, validator
 class BaseDocument(ABC, BaseModel):
-    """Generic abstract interface for retrievable documents."""
+    """Abstract base class defining the interface for retrievable documents."""
     id_: str = Field(
         default_factory=lambda: str(uuid.uuid4()),

beekeeper/core/embeddings/base.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from abc import ABC, abstractmethod
 from enum import Enum
-from typing import List
+from typing import List, Union
 import numpy as np
 from beekeeper.core.document import Document
 from beekeeper.core.schema import TransformerComponent
 from beekeeper.core.utils.pairwise import cosine_similarity
+from deprecated import deprecated
 Embedding = List[float]
@@ -35,23 +36,64 @@ def similarity(
 class BaseEmbedding(TransformerComponent, ABC):
-    """An interface for embedding models."""
+    """Abstract base class defining the interface for embedding models."""
     @classmethod
     def class_name(cls) -> str:
         return "BaseEmbedding"
     @abstractmethod
+    def embed_text(
+        self, input: Union[str, List[str]]
+    ) -> Union[Embedding, List[Embedding]]:
+        """Embed one or more text strings."""
+    def embed_documents(self, documents: List[Document]) -> List[Document]:
+        """
+        Embed a list of documents and assign the computed embeddings to the 'embedding' attribute.
+        Args:
+            documents (List[Document]): List of documents to compute embeddings.
+        """
+        texts = [document.get_content() for document in documents]
+        embeddings = self.embed_text(texts)
+        for document, embedding in zip(documents, embeddings):
+            document.embedding = embedding
+        return documents
+    @deprecated(
+        reason="'embed_texts()' is deprecated and will be removed in a future version. Use 'embed_text()' instead.",
+        version="1.0.3",
+        action="always",
+    )
+    def embed_texts(self, texts: List[str]) -> List[Embedding]:
+        return self.embed_text(texts)
+    @deprecated(
+        reason="'get_text_embedding()' is deprecated and will be removed in a future version. Use 'embed_text()' instead.",
+        version="1.0.2",
+        action="always",
+    )
     def get_text_embedding(self, query: str) -> Embedding:
-        """Get query embedding."""
+        return self.embed_text(query)
-    @abstractmethod
+    @deprecated(
+        reason="'get_texts_embedding()' is deprecated and will be removed in a future version. Use 'embed_texts()' instead.",
+        version="1.0.2",
+        action="always",
+    )
     def get_texts_embedding(self, texts: List[str]) -> List[Embedding]:
-        """Get text embeddings."""
+        return self.embed_texts(texts)
-    @abstractmethod
+    @deprecated(
+        reason="'get_documents_embedding()' is deprecated and will be removed in a future version. Use 'embed_documents()' instead.",
+        version="1.0.2",
+        action="always",
+    )
     def get_documents_embedding(self, documents: List[Document]) -> List[Document]:
-        """Get documents embeddings."""
+        return self.embed_documents(documents)
     @staticmethod
     def similarity(

beekeeper/core/evaluation/context_similarity.py CHANGED Viewed

@@ -10,7 +10,7 @@ class ContextSimilarityEvaluator(BaseModel):
     Measures how much context has contributed to the answer’s.
     A higher value suggests a greater proportion of the context is present in the LLM's response.
-    Args:
+    Attributes:
         embed_model (BaseEmbedding): The embedding model used to compute vector representations.
         similarity_mode (str, optional): Similarity strategy to use. Supported options are
             `"cosine"`, `"dot_product"`, and `"euclidean"`. Defaults to `"cosine"`.
@@ -18,13 +18,13 @@ class ContextSimilarityEvaluator(BaseModel):
             whether a context segment "passes". Defaults to `0.8`.
     Example:
-        .. code-block:: python
+        ```python
+        from beekeeper.core.evaluation import ContextSimilarityEvaluator
+        from beekeeper.embeddings.huggingface import HuggingFaceEmbedding
-            from beekeeper.core.evaluation import ContextSimilarityEvaluator
-            from beekeeper.embeddings.huggingface import HuggingFaceEmbedding
-            embedding = HuggingFaceEmbedding()
-            ctx_sim_evaluator = ContextSimilarityEvaluator(embed_model=embedding)
+        embedding = HuggingFaceEmbedding()
+        ctx_sim_evaluator = ContextSimilarityEvaluator(embed_model=embedding)
+        ```
     """
     embed_model: BaseEmbedding
@@ -41,11 +41,11 @@ class ContextSimilarityEvaluator(BaseModel):
             generated_text (str): LLM response based on given context.
         Example:
-            .. code-block:: python
-                evaluation_result = ctx_sim_evaluator.evaluate(
-                    contexts=[], generated_text="<candidate>"
-                )
+            ```python
+            evaluation_result = ctx_sim_evaluator.evaluate(
+                contexts=[], generated_text="<candidate>"
+            )
+            ```
         """
         if not contexts or not generated_text:
             raise ValueError(
@@ -53,10 +53,10 @@ class ContextSimilarityEvaluator(BaseModel):
             )
         evaluation_result = {"contexts_score": [], "score": 0}
-        candidate_embedding = self.embed_model.get_text_embedding(generated_text)
+        candidate_embedding = self.embed_model.embed_text(generated_text)
         for context in contexts:
-            context_embedding = self.embed_model.get_text_embedding(context)
+            context_embedding = self.embed_model.embed_text(context)
             evaluation_result["contexts_score"].append(
                 self.embed_model.similarity(
                     candidate_embedding,

beekeeper/core/flows/ingestion_flow.py CHANGED Viewed

@@ -22,7 +22,7 @@ class IngestionFlow:
     """
     An ingestion flow for processing and storing data.
-    Args:
+    Attributes:
         transformers (List[TransformerComponent]): A list of transformer components applied to the input documents.
         doc_strategy (DocStrategy): The strategy used for handling document duplicates.
             Defaults to `DocStrategy.DUPLICATE_ONLY`.
@@ -32,18 +32,18 @@ class IngestionFlow:
         vector_store (BaseVectorStore, optional): Vector store for saving processed documents
     Example:
-        .. code-block:: python
-            from beekeeper.core.flows import IngestionFlow
-            from beekeeper.core.text_chunkers import TokenTextChunker
-            from beekeeper.embeddings.huggingface import HuggingFaceEmbedding
-            ingestion_flow = IngestionFlow(
-                transformers=[
-                    TokenTextChunker(),
-                    HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-small"),
-                ]
-            )
+        ```python
+        from beekeeper.core.flows import IngestionFlow
+        from beekeeper.core.text_chunkers import TokenTextChunker
+        from beekeeper.embeddings.huggingface import HuggingFaceEmbedding
+        ingestion_flow = IngestionFlow(
+            transformers=[
+                TokenTextChunker(),
+                HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-small"),
+            ]
+        )
+        ```
     """
     def __init__(
@@ -139,9 +139,9 @@ class IngestionFlow:
             documents: Set of documents to be transformed.
         Example:
-            .. code-block:: python
-                ingestion_flow.run(documents: List[Document])
+            ```python
+            ingestion_flow.run(documents: List[Document])
+            ```
         """
         documents_processed = []
         input_documents = self._read_documents(documents)

beekeeper/core/guardrails/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from beekeeper.core.guardrails.base import BaseGuardrail
+from beekeeper.core.guardrails.types import GuardrailResponse
+__all__ = ["BaseGuardrail", "GuardrailResponse"]

beekeeper/core/guardrails/base.py ADDED Viewed

@@ -0,0 +1,15 @@
+from abc import ABC, abstractmethod
+from beekeeper.core.guardrails.types import GuardrailResponse
+class BaseGuardrail(ABC):
+    """Abstract base class defining the interface for LLMs."""
+    @classmethod
+    def class_name(cls) -> str:
+        return "BaseGuardrail"
+    @abstractmethod
+    def enforce(self, text: str, direction: str) -> GuardrailResponse:
+        """Runs policies enforcement to specified guardrail."""

beekeeper/core/guardrails/types.py ADDED Viewed

@@ -0,0 +1,13 @@
+from typing import Any, Optional
+from pydantic import BaseModel, Field
+class GuardrailResponse(BaseModel):
+    """Guardrail response."""
+    text: str = Field(..., description="Generated text response")
+    action: Optional[str] = Field(
+        default=None, description="Action taken by the guardrail"
+    )
+    raw: Optional[Any] = Field(default=None)

beekeeper/core/llms/__init__.py CHANGED Viewed

@@ -6,6 +6,4 @@ from beekeeper.core.llms.types import (
     MessageRole,
 )
-__all__ = (
-    ["BaseLLM", "ChatMessage", "ChatResponse", "GenerateResponse", "MessageRole"],
-)
+__all__ = ["BaseLLM", "ChatMessage", "ChatResponse", "GenerateResponse", "MessageRole"]

beekeeper/core/llms/base.py CHANGED Viewed

@@ -2,32 +2,36 @@ from abc import ABC, abstractmethod
 from typing import Any, List, Optional
 from beekeeper.core.llms.types import ChatMessage, ChatResponse, GenerateResponse
-from beekeeper.core.observers import BaseObserver
+from beekeeper.core.monitors import BaseMonitor
 from pydantic import BaseModel
 class BaseLLM(ABC, BaseModel):
-    """An interface for LLMs."""
+    """Abstract base class defining the interface for LLMs."""
     model_config = {"arbitrary_types_allowed": True}
-    callback_manager: Optional[BaseObserver] = None
+    callback_manager: Optional[BaseMonitor] = None
     @classmethod
     def class_name(cls) -> str:
         return "BaseLLM"
-    def convert_chat_messages(self, messages: List[ChatMessage]) -> List[dict]:
-        """Convert chat messages to LLM message format."""
-        return [message.model_dump() for message in messages]
+    def text_completion(self, prompt: str, **kwargs: Any) -> str:
+        """
+        Generates a chat completion for LLM. Using OpenAI's standard endpoint (/completions).
+        Args:
+            prompt (str): The input prompt to generate a completion for.
+            **kwargs (Any): Additional keyword arguments to customize the LLM completion request.
+        """
+        response = self.completion(prompt=prompt, **kwargs)
+        return response.text
     @abstractmethod
     def completion(self, prompt: str, **kwargs: Any) -> GenerateResponse:
         """Generates a completion for LLM."""
-    @abstractmethod
-    def text_completion(self, prompt: str, **kwargs: Any) -> str:
-        """Generates a text completion for LLM."""
     @abstractmethod
     def chat_completion(
         self, messages: List[ChatMessage], **kwargs: Any

beekeeper/core/llms/decorators.py CHANGED Viewed

@@ -5,11 +5,17 @@ from logging import getLogger
 from typing import Callable
 from beekeeper.core.llms.types import ChatMessage
-from beekeeper.core.observers.types import PayloadRecord
+from beekeeper.core.monitors.types import PayloadRecord
+from deprecated import deprecated
 logger = getLogger(__name__)
+@deprecated(
+    reason="'llm_chat_observer()' is deprecated and will be removed in a future version. Use 'llm_chat_monitor()'.",
+    version="1.0.8",
+    action="always",
+)
 def llm_chat_observer() -> Callable:
     """
     Decorator to wrap a method with llm handler logic.
@@ -81,3 +87,76 @@ def llm_chat_observer() -> Callable:
         return async_wrapper
     return decorator
+def llm_chat_monitor() -> Callable:
+    """
+    Decorator to wrap a method with llm handler logic.
+    Looks for observability instances in `self.callback_manager`.
+    """
+    def decorator(f: Callable) -> Callable:
+        def async_wrapper(self, *args, **kwargs):
+            callback_manager_fns = getattr(self, "callback_manager", None)
+            start_time = time.time()
+            llm_return_val = f(self, *args, **kwargs)
+            response_time = int((time.time() - start_time) * 1000)
+            if callback_manager_fns:
+                def async_callback_thread():
+                    try:
+                        # Extract input messages
+                        if len(args) > 0 and isinstance(args[0], ChatMessage):
+                            input_chat_messages = args[0]
+                        elif "messages" in kwargs:
+                            input_chat_messages = kwargs["messages"]
+                        else:
+                            raise ValueError(
+                                "No messages provided in positional or keyword arguments"
+                            )
+                        # Get the user's latest message after each interaction to chat observability.
+                        user_messages = [
+                            msg for msg in input_chat_messages if msg.role == "user"
+                        ]
+                        last_user_message = (
+                            user_messages[-1].content if user_messages else None
+                        )
+                        # Get the system/instruct (first) message to chat observability.
+                        system_messages = [
+                            msg for msg in input_chat_messages if msg.role == "system"
+                        ]
+                        system_message = (
+                            system_messages[0].content if system_messages else None
+                        )
+                        callback = callback_manager_fns(
+                            payload=PayloadRecord(
+                                input_text=(system_message or "") + last_user_message,
+                                generated_text=llm_return_val.message.content,
+                                generated_token_count=llm_return_val.raw["usage"][
+                                    "completion_tokens"
+                                ],
+                                input_token_count=llm_return_val.raw["usage"][
+                                    "prompt_tokens"
+                                ],
+                                response_time=response_time,
+                            )
+                        )
+                        if asyncio.iscoroutine(callback):
+                            asyncio.run(callback)
+                    except Exception as e:
+                        logger.error(f"Observability callback error: {e}")
+                threading.Thread(target=async_callback_thread).start()
+            return llm_return_val
+        return async_wrapper
+    return decorator

beekeeper/core/llms/types.py CHANGED Viewed

@@ -18,6 +18,10 @@ class ChatMessage(BaseModel):
     role: MessageRole = Field(default=MessageRole.USER)
     content: Optional[str] = Field(default=None)
+    def to_dict(self) -> dict:
+        """Convert ChatMessage to dict."""
+        return self.model_dump(exclude_none=True)
 class GenerateResponse(BaseModel):
     """Generate response."""

beekeeper/core/monitors/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from beekeeper.core.monitors.base import BaseMonitor, PromptMonitor
+__all__ = ["BaseMonitor", "PromptMonitor"]

beekeeper/core/monitors/base.py ADDED Viewed

@@ -0,0 +1,36 @@
+from abc import ABC, abstractmethod
+from typing import Optional
+from beekeeper.core.monitors.types import PayloadRecord
+from beekeeper.core.prompts import PromptTemplate
+class BaseMonitor(ABC):
+    """Abstract base class defining the interface for monitors."""
+    @classmethod
+    def class_name(cls) -> str:
+        return "BaseMonitor"
+class PromptMonitor(BaseMonitor):
+    """Abstract base class defining the interface for prompt observability."""
+    def __init__(self, prompt_template: Optional[PromptTemplate] = None) -> None:
+        self.prompt_template = prompt_template
+    @classmethod
+    def class_name(cls) -> str:
+        return "PromptMonitor"
+    @abstractmethod
+    def __call__(self, payload: PayloadRecord) -> None:
+        """PromptMonitor."""
+class TelemetryMonitor(BaseMonitor):
+    """Abstract base class defining the interface for telemetry observability."""
+    @classmethod
+    def class_name(cls) -> str:
+        return "TelemetryMonitor"

beekeeper/core/monitors/types.py ADDED Viewed

@@ -0,0 +1,11 @@
+from pydantic import BaseModel
+class PayloadRecord(BaseModel):
+    """Payload record."""
+    input_text: str
+    generated_text: str
+    generated_token_count: int
+    input_token_count: int
+    response_time: int

beekeeper/core/observers/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from beekeeper.core.observers.base import BaseObserver, ModelObserver
+from beekeeper.core.observers.base import BaseObserver, ModelObserver, PromptObserver
-__all__ = (["BaseObserver", "ModelObserver"],)
+__all__ = ["BaseObserver", "ModelObserver", "PromptObserver"]

beekeeper/core/observers/base.py CHANGED Viewed

@@ -1,36 +1,33 @@
-from abc import ABC, abstractmethod
-from typing import Optional
+from beekeeper.core.monitors import BaseMonitor, PromptMonitor
+from deprecated import deprecated
-from beekeeper.core.observers.types import PayloadRecord
-from beekeeper.core.prompts import PromptTemplate
+class BaseObserver(BaseMonitor):
+    """DEPRECATED: An interface for observability."""
-class BaseObserver(ABC):
-    """An interface for observability."""
-    @classmethod
-    def class_name(cls) -> str:
-        return "BaseObserver"
+@deprecated(
+    reason="'PromptObserver()' is deprecated and will be removed in a future version. Use 'PromptMonitor()' from 'beekeeper.core.monitors' instead.",
+    version="1.0.4",
+    action="always",
+)
+class PromptObserver(PromptMonitor):
+    """DEPRECATED: Abstract base class defining the interface for prompt observability."""
-class ModelObserver(BaseObserver):
-    """An interface for model observability."""
+@deprecated(
+    reason="'ModelObserver()' is deprecated and will be removed in a future version. Use 'PromptMonitor()' from 'beekeeper.core.monitors' instead.",
+    version="1.0.3",
+    action="always",
+)
+class ModelObserver(PromptMonitor):
+    """DEPRECATED: This class is deprecated and kept only for backward compatibility."""
-    def __init__(self, prompt_template: Optional[PromptTemplate] = None) -> None:
-        self.prompt_template = prompt_template
-    @classmethod
-    def class_name(cls) -> str:
-        return "ModelObserver"
-    @abstractmethod
-    def __call__(self, payload: PayloadRecord) -> None:
-        """ModelObserver."""
-class TelemetryObserver(BaseObserver):
-    """An interface for telemetry observability."""
-    @classmethod
-    def class_name(cls) -> str:
-        return "TelemetryObserver"
+@deprecated(
+    reason="'TelemetryObserver()' is deprecated and will be removed in a future version. Use 'TelemetryMonitor()' from 'beekeeper.core.monitors' instead.",
+    version="1.0.4",
+    action="always",
+)
+class TelemetryObserver(BaseMonitor):
+    """DEPRECATED: Abstract base class defining the interface for telemetry observability."""

beekeeper/core/prompts/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from beekeeper.core.prompts.base import PromptTemplate
-__all__ = (["PromptTemplate"],)
+__all__ = ["PromptTemplate"]

beekeeper/core/prompts/base.py CHANGED Viewed

@@ -6,15 +6,15 @@ class PromptTemplate(BaseModel):
     """
     Prompt Template.
-    Args:
+    Attributes:
         template (str): Prompt template string.
     Example:
-        .. code-block:: python
+        ```python
+        from beekeeper.core.prompts import PromptTemplate
-            from beekeeper.core.prompts import PromptTemplate
-            PromptTemplate("Summarize the following text: {input_text}")
+        PromptTemplate("Summarize the following text: {input_text}")
+        ```
     """
     template: str
@@ -22,6 +22,18 @@ class PromptTemplate(BaseModel):
     def __init__(self, template: str):
         super().__init__(template=template)
+    @classmethod
+    def from_value(cls, value: str) -> "PromptTemplate":
+        if isinstance(value, cls):
+            return value
+        if isinstance(value, str):
+            return cls(value)
+        raise TypeError(
+            f"Invalid type for parameter 'prompt_template'. Expected str or PromptTemplate, but received {type(value).__name__}."
+        )
     def format(self, **kwargs):
         """
         Formats the template using the provided dynamic variables.

beekeeper/core/readers/base.py CHANGED Viewed

@@ -6,7 +6,7 @@ from pydantic.v1 import BaseModel
 class BaseReader(ABC, BaseModel):
-    """An interface for document reader."""
+    """Abstract base class defining the interface for document reader."""
     @classmethod
     def class_name(cls) -> str:
@@ -15,9 +15,3 @@ class BaseReader(ABC, BaseModel):
     @abstractmethod
     def load_data(self) -> List[Document]:
         """Loads data."""
-    def load(self) -> List[Document]:
-        return self.load_data()
-    def lazy_load(self) -> List[Document]:
-        return self.load_data()

beekeeper/core/readers/directory.py CHANGED Viewed

@@ -29,18 +29,18 @@ class DirectoryReader(BaseReader):
     Reads files from a directory, optionally filtering by file extension and
     allowing recursive directory traversal.
-    Args:
+    Attributes:
         required_exts (List[str], optional): List of file extensions to filter by.
             Only files with these extensions will be loaded. Defaults to `None` (no filtering).
         recursive (bool, optional): Whether to recursively search subdirectories for files.
             Defaults to `False`.
     Example:
-        .. code-block:: python
+        ```python
+        from beekeeper.core.readers import DirectoryReader
-            from beekeeper.core.readers import DirectoryReader
-            directory_reader = DirectoryReader()
+        directory_reader = DirectoryReader()
+        ```
     """
     required_exts: List[str] = [".pdf", ".docx", ".html"]

beekeeper/core/text_chunkers/base.py CHANGED Viewed

@@ -3,22 +3,39 @@ from typing import List
 from beekeeper.core.document import Document
 from beekeeper.core.schema import TransformerComponent
+from deprecated import deprecated
 class BaseTextChunker(TransformerComponent, ABC):
-    """An interface for text chunker."""
+    """Abstract base class defining the interface for text chunker."""
     @classmethod
     def class_name(cls) -> str:
         return "BaseTextChunker"
     @abstractmethod
-    def from_text(self, text: str) -> List[str]:
-        """Chunk text."""
+    def chunk_text(self, text: str) -> List[str]:
+        """Split a single string of text into smaller chunks."""
     @abstractmethod
+    def chunk_documents(self, documents: List[Document]) -> List[Document]:
+        """Split a list of documents into smaller document chunks."""
+    @deprecated(
+        reason="'from_text()' is deprecated and will be removed in a future version. Use 'chunk_text' instead.",
+        version="1.0.2",
+        action="always",
+    )
+    def from_text(self, text: str) -> List[str]:
+        return self.chunk_text(text)
+    @deprecated(
+        reason="'from_documents()' is deprecated and will be removed in a future version. Use 'chunk_documents' instead.",
+        version="1.0.2",
+        action="always",
+    )
     def from_documents(self, documents: List[Document]) -> List[Document]:
-        """Chunk list of documents."""
+        return self.chunk_documents(documents)
     def __call__(self, documents: List[Document]) -> List[Document]:
-        return self.from_documents(documents)
+        return self.chunk_documents(documents)

beekeeper/core/text_chunkers/semantic.py CHANGED Viewed

@@ -14,9 +14,9 @@ class SemanticChunker(BaseTextChunker, BaseModel):
     Python class designed to split text into chunks using semantic understanding.
     Credit to Greg Kamradt's notebook:
-    `5 Levels Of Text Splitting <https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/tutorials/LevelsOfTextSplitting/5_Levels_Of_Text_Splitting.ipynb>`_.
+    [5 Levels Of Text Splitting](https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/tutorials/LevelsOfTextSplitting/5_Levels_Of_Text_Splitting.ipynb)
-    Args:
+    Attributes:
         embed_model (BaseEmbedding): Embedding model used for semantic chunking.
         buffer_size (int, optional): Number of sentences to group together. Default is `1`.
         breakpoint_threshold_amount (int, optional): Threshold percentage for detecting breakpoints between group of sentences.
@@ -24,13 +24,13 @@ class SemanticChunker(BaseTextChunker, BaseModel):
         device (str, optional): Device to use for processing. Currently supports "cpu" and "cuda". Default is `cpu`.
     Example:
-        .. code-block:: python
+        ```python
+        from beekeeper.core.text_chunkers import SemanticChunker
+        from beekeeper.embeddings.huggingface import HuggingFaceEmbedding
-            from beekeeper.core.text_chunkers import SemanticChunker
-            from beekeeper.embeddings.huggingface import HuggingFaceEmbedding
-            embedding = HuggingFaceEmbedding()
-            text_chunker = SemanticChunker(embed_model=embedding)
+        embedding = HuggingFaceEmbedding()
+        text_chunker = SemanticChunker(embed_model=embedding)
+        ```
     """
     embed_model: BaseEmbedding
@@ -99,9 +99,9 @@ class SemanticChunker(BaseTextChunker, BaseModel):
         return [i for i, x in enumerate(distances) if x > distance_threshold]
-    def from_text(self, text: str) -> List[str]:
+    def chunk_text(self, text: str) -> List[str]:
         """
-        Split text into chunks.
+        Split a single string of text into smaller chunks.
         Args:
             text (str): Input text to split.
@@ -133,9 +133,9 @@ class SemanticChunker(BaseTextChunker, BaseModel):
         return chunks
-    def from_documents(self, documents: List[Document]) -> List[Document]:
+    def chunk_documents(self, documents: List[Document]) -> List[Document]:
         """
-        Split documents into chunks.
+        Split a list of documents into smaller document chunks.
         Args:
             documents (List[Document]): List of `Document` objects to split.
@@ -146,7 +146,7 @@ class SemanticChunker(BaseTextChunker, BaseModel):
         chunks = []
         for document in documents:
-            texts = self.from_text(document.get_content())
+            texts = self.chunk_text(document.get_content())
             metadata = {**document.get_metadata()}
             for text in texts:

beekeeper/core/text_chunkers/sentence.py CHANGED Viewed

@@ -18,17 +18,17 @@ class SentenceChunker(BaseTextChunker):
     Designed to split input text into smaller chunks, particularly useful for processing
     large documents or texts. Tries to keep sentences and paragraphs together.
-    Args:
+    Attributes:
         chunk_size (int, optional): Size of each chunk. Default is `512`.
         chunk_overlap (int, optional): Amount of overlap between chunks. Default is `256`.
         separator (str, optional): Separator used for splitting text. Default is `" "`.
     Example:
-        .. code-block:: python
+        ```python
+        from beekeeper.core.text_chunkers import SentenceChunker
-            from beekeeper.core.text_chunkers import SentenceChunker
-            text_chunker = SentenceChunker()
+        text_chunker = SentenceChunker()
+        ```
     """
     def __init__(
@@ -53,9 +53,9 @@ class SentenceChunker(BaseTextChunker):
             split_by_char(),
         ]
-    def from_text(self, text: str) -> List[str]:
+    def chunk_text(self, text: str) -> List[str]:
         """
-        Split text into chunks.
+        Split a single string of text into smaller chunks.
         Args:
             text (str): Input text to split.
@@ -64,19 +64,19 @@ class SentenceChunker(BaseTextChunker):
             List[str]: List of text chunks.
         Example:
-            .. code-block:: python
-                chunks = text_chunker.from_text(
-                    "Beekeeper is a data framework to load any data in one line of code and connect with AI applications."
-                )
+            ```python
+            chunks = text_chunker.chunk_text(
+                "Beekeeper is a data framework to load any data in one line of code and connect with AI applications."
+            )
+            ```
         """
         splits = self._split(text)
         return merge_splits(splits, self.chunk_size, self.chunk_overlap)
-    def from_documents(self, documents: List[Document]) -> List[Document]:
+    def chunk_documents(self, documents: List[Document]) -> List[Document]:
         """
-        Split documents into chunks.
+        Split a list of documents into smaller document chunks.
         Args:
             documents (List[Document]): List of `Document` objects to split.
@@ -87,7 +87,7 @@ class SentenceChunker(BaseTextChunker):
         chunks = []
         for document in documents:
-            texts = self.from_text(document.get_content())
+            texts = self.chunk_text(document.get_content())
             metadata = {**document.get_metadata()}
             for text in texts:

beekeeper/core/text_chunkers/token.py CHANGED Viewed

@@ -16,17 +16,17 @@ class TokenTextChunker(BaseTextChunker):
     This is the simplest splitting method. Designed to split input text into smaller chunks
     by looking at word tokens.
-    Args:
+    Attributes:
         chunk_size (int, optional): Size of each chunk. Default is `512`.
         chunk_overlap (int, optional): Amount of overlap between chunks. Default is `256`.
         separator (str, optional): Separators used for splitting into words. Default is `\\n\\n`.
     Example:
-        .. code-block:: python
+        ```python
+        from beekeeper.core.text_chunkers import TokenTextChunker
-            from beekeeper.core.text_chunkers import TokenTextChunker
-            text_chunker = TokenTextChunker()
+        text_chunker = TokenTextChunker()
+        ```
     """
     def __init__(
@@ -48,9 +48,9 @@ class TokenTextChunker(BaseTextChunker):
         self._sub_split_fns = [split_by_char()]
-    def from_text(self, text: str) -> List[str]:
+    def chunk_text(self, text: str) -> List[str]:
         """
-        Split text into chunks.
+        Split a single string of text into smaller chunks.
         Args:
             text (str): Input text to split.
@@ -59,19 +59,19 @@ class TokenTextChunker(BaseTextChunker):
             List[str]: List of text chunks.
         Example:
-            .. code-block:: python
-                chunks = text_chunker.from_text(
-                    "Beekeeper is a data framework to load any data in one line of code and connect with AI applications."
-                )
+            ```python
+            chunks = text_chunker.chunk_text(
+                "Beekeeper is a data framework to load any data in one line of code and connect with AI applications."
+            )
+            ```
         """
         splits = self._split(text)
         return merge_splits(splits, self.chunk_size, self.chunk_overlap)
-    def from_documents(self, documents: List[Document]) -> List[Document]:
+    def chunk_documents(self, documents: List[Document]) -> List[Document]:
         """
-        Split documents into chunks.
+        Split a list of documents into smaller document chunks.
         Args:
             documents (List[Document]): List of `Document` objects to split.
@@ -82,7 +82,7 @@ class TokenTextChunker(BaseTextChunker):
         chunks = []
         for document in documents:
-            texts = self.from_text(document.get_content())
+            texts = self.chunk_text(document.get_content())
             metadata = {**document.get_metadata()}
             for text in texts:

beekeeper/core/tools/base.py CHANGED Viewed

@@ -16,7 +16,7 @@ class ToolInputSchema(BaseModel):
 class BaseTool(ABC, BaseModel):
-    """Interface that Beekeeper tools must implement."""
+    """Abstract base class defining the interface for tools."""
     name: str
     description: str

beekeeper/core/vector_stores/base.py CHANGED Viewed

@@ -2,10 +2,11 @@ from abc import ABC, abstractmethod
 from typing import List, Tuple
 from beekeeper.core.document import Document
+from deprecated import deprecated
 class BaseVectorStore(ABC):
-    """An interface for vector store."""
+    """Abstract base class defining the interface for vector store."""
     @classmethod
     def class_name(cls) -> str:
@@ -16,8 +17,16 @@ class BaseVectorStore(ABC):
         """Add documents to vector store."""
     @abstractmethod
+    def query_documents(self, query: str, top_k: int = 4) -> List[Document]:
+        """Query for similar documents in the vector store based on the input query provided."""
+    @deprecated(
+        reason="'search_documents()' is deprecated and will be removed in a future version. Use 'query_documents' instead.",
+        version="1.0.2",
+        action="always",
+    )
     def search_documents(self, query: str, top_k: int = 4) -> List[Document]:
-        """Search for similar documents in the vector store based on the input query provided."""
+        return self.query_documents(query, top_k)
     @abstractmethod
     def delete_documents(self, ids: List[str]) -> None:

{beekeeper_core-1.0.1.dist-info → beekeeper_core-1.0.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: beekeeper-core
-Version: 1.0.1
+Version: 1.0.10
 Summary: Load any data in one line of code and connect with AI applications
 Project-URL: Repository, https://github.com/beekeeper-ai/beekeeper
 Author-email: Leonardo Furnielis <leonardofurnielis@outlook.com>
@@ -9,14 +9,14 @@ Keywords: AI,LLM,QA,RAG,data,observability,retrieval,semantic-search
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Requires-Python: <4.0,>=3.10
-Requires-Dist: deprecated<2.0.0,>=1.2.18
-Requires-Dist: nltk<4.0.0,>=3.9.1
+Requires-Python: <3.14,>=3.11
+Requires-Dist: deprecated<2.0.0,>=1.3.1
+Requires-Dist: nltk<4.0.0,>=3.9.2
 Requires-Dist: numpy<1.27.0,>=1.26.4
-Requires-Dist: pydantic<3.0.0,>=2.11.5
-Requires-Dist: tiktoken<0.10.0,>=0.9.0
+Requires-Dist: pydantic<3.0.0,>=2.12.5
+Requires-Dist: tiktoken<0.13.0,>=0.12.0
 Provides-Extra: dev
-Requires-Dist: ruff>=0.11.13; extra == 'dev'
+Requires-Dist: ruff>=0.14.9; extra == 'dev'
 Description-Content-Type: text/markdown
 # Beekeeper Core

beekeeper_core-1.0.10.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,43 @@
+beekeeper/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+beekeeper/core/schema.py,sha256=8OZkRVDry6nglof38w4AvWaRDpf0zOUtqGyfWBhj8lk,267
+beekeeper/core/document/__init__.py,sha256=nCN0CNee1v-28W5a26Ca6iAVefcr_KAieAH6QfN4eyw,144
+beekeeper/core/document/base.py,sha256=NBlyj8C0uvvsiElU1wFcXU27LOb3VP0U11nMgOR6cgY,2635
+beekeeper/core/embeddings/__init__.py,sha256=4AzGUtoL7wComtQ-bEVwzoMQgahBhpxcF_4M5rQ0ClQ,159
+beekeeper/core/embeddings/base.py,sha256=3r_gr9ceezuAXexkeS5Bq23778BeF7h--IDEvL5pLjE,3464
+beekeeper/core/evaluation/__init__.py,sha256=FyZGpbTXcIM3BynssiS6wUm2KZkMnLVmKF50D7iqkXM,135
+beekeeper/core/evaluation/context_similarity.py,sha256=kT1J3HUgF51HHQA5Sew9ahbSV_jhgbCBMRYqPLKlljQ,2690
+beekeeper/core/flows/__init__.py,sha256=v6VLJ309l5bHYcG1JLUu6_kRoOwIZazonH4-n_UQzYQ,91
+beekeeper/core/flows/ingestion_flow.py,sha256=lfZM6lHF9rBTviimSlptHm_1htaA5qLLhE-Sm_7fwGY,6110
+beekeeper/core/guardrails/__init__.py,sha256=onznwYWAyOaxOVeYZle7oDtj3QJODQvJHcf5td_laZg,169
+beekeeper/core/guardrails/base.py,sha256=T-Ywr80iTL0EFYarCymFEEI3QkMsrw27JVh_0407sEU,427
+beekeeper/core/guardrails/types.py,sha256=7sgw1S5BZY0OqO-n04pHXPU7sG-NEZJlQyIeb2Fsq9Q,359
+beekeeper/core/llms/__init__.py,sha256=PN-5Y_Km_l2vO8v9d7iJ6_5xPCZJBh8UzwqRvQZlmTo,250
+beekeeper/core/llms/base.py,sha256=jFU1om9Qk6KTIsZXeke7lMp--x009G6-fnM1615l2BQ,1292
+beekeeper/core/llms/decorators.py,sha256=wRYXlKD5Cc8k1qPGYEEv-RSJdoHj-MQqKuAAQzkN9Fc,6534
+beekeeper/core/llms/types.py,sha256=lWswZ_bJkPmoTeheWxB1-OnABTYIVAcASkZCwjaTLzE,847
+beekeeper/core/monitors/__init__.py,sha256=TvoiIUJtWRO_4zqCICsFaGl_v4Tpvft1M542Bi13pOI,112
+beekeeper/core/monitors/base.py,sha256=3ooSfgVpWoRLe2TqizHMRK_bI5C-sla57aYJ47FmIXM,980
+beekeeper/core/monitors/types.py,sha256=s-4tB8OdeaCUIRvi6FLuib2u4Yl9evqQdCundNREXQY,217
+beekeeper/core/observers/__init__.py,sha256=Z5sDAajai4QLdGIrjq-vr5eJEBhriMMCw5u46j6xHvA,149
+beekeeper/core/observers/base.py,sha256=y1SE_0WQusKhVomFuZCkk42Jb7r93ZS6r_j8vs_Y_r4,1203
+beekeeper/core/observers/types.py,sha256=s-4tB8OdeaCUIRvi6FLuib2u4Yl9evqQdCundNREXQY,217
+beekeeper/core/prompts/__init__.py,sha256=kFp2N5giNEMA4hc3eZqstqaZu0c0BRAnP0NuF5aUaqI,85
+beekeeper/core/prompts/base.py,sha256=Edh77DuYm8lDhJvHazC5hgaiQEit-R4M-TWLX8-gIU0,1106
+beekeeper/core/prompts/utils.py,sha256=Cqpefzzxd6DxPbOKVyUCsIs-ibBGKhYU6ppYqhPT9vM,1378
+beekeeper/core/readers/__init__.py,sha256=vPCmWmK92LYL-R0LFcPqjOKFHqxW0xUP5r6M9GNxoqY,157
+beekeeper/core/readers/base.py,sha256=46VRNkCmKP2RWJT1-kRTSHG9SjY1xbhKUy1a7-OrgPg,418
+beekeeper/core/readers/directory.py,sha256=IjFuqrBXonr5MhsRLVdTIi4U5I1xd1ZlD2Xh_F8F1h8,2873
+beekeeper/core/text_chunkers/__init__.py,sha256=RIqqTGgn2BhbIgDGmTETw65DAQeI5Zls_A5H29jZVgA,366
+beekeeper/core/text_chunkers/base.py,sha256=RB7Qb_tlsCx11Q5Dl9IQ0wUpMuxMq4DGEwpFnT7ZOQw,1413
+beekeeper/core/text_chunkers/semantic.py,sha256=Q2HXgveT0EC8GALHwOls8az2ctioKnRbrfYMhEC1JN8,5903
+beekeeper/core/text_chunkers/sentence.py,sha256=HOCPAGRCiYnZ5zt9LR-bhsvwBHJkAkKhs0e8V7Jjkhk,4134
+beekeeper/core/text_chunkers/token.py,sha256=xCbCpS6wr0Q1OOygsxfQkJ1dV09x7SXwwWbYcRor-rs,3901
+beekeeper/core/text_chunkers/utils.py,sha256=ImYUlU3dC44MqB1_thal6skynMhR6RF-JuPEDRREBUg,4586
+beekeeper/core/tools/__init__.py,sha256=M0WCBWkKrkVcezlMtZqYtHiuWdCiJFk681JakWAFlVA,78
+beekeeper/core/tools/base.py,sha256=A6TXn7g3DAZMREYAobfVlyOBuJn_8mIeCByc5412L9Y,948
+beekeeper/core/utils/pairwise.py,sha256=cpi8GItPFSYP4sjB5zgTFHi6JfBVWsMnNu8koA9VYQU,536
+beekeeper/core/vector_stores/__init__.py,sha256=R5SRG3YpOZqRwIfBLB8KVV6FALWqhIzIhCjRGj-bwPc,93
+beekeeper/core/vector_stores/base.py,sha256=YFW1ioZbFEcJovAh0ZCpHnj0eiXtZvqy_pj2lxPS92k,1652
+beekeeper_core-1.0.10.dist-info/METADATA,sha256=6yt3XFndeAKudQ_UsnQFZ_B_MeibOrrrIk_yt_zRV_0,1331
+beekeeper_core-1.0.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+beekeeper_core-1.0.10.dist-info/RECORD,,

beekeeper_core-1.0.1.dist-info/RECORD DELETED Viewed

@@ -1,37 +0,0 @@
-beekeeper/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-beekeeper/core/schema.py,sha256=8OZkRVDry6nglof38w4AvWaRDpf0zOUtqGyfWBhj8lk,267
-beekeeper/core/document/__init__.py,sha256=nCN0CNee1v-28W5a26Ca6iAVefcr_KAieAH6QfN4eyw,144
-beekeeper/core/document/base.py,sha256=S6pmOLE16ddO4Mg4EE4g1kz7fHMx4yPanhqeJlZZSdE,2619
-beekeeper/core/embeddings/__init__.py,sha256=4AzGUtoL7wComtQ-bEVwzoMQgahBhpxcF_4M5rQ0ClQ,159
-beekeeper/core/embeddings/base.py,sha256=_vhdfNLdJjYotLYNdIvbkax4XDSNyspOsE66Wyllbys,1829
-beekeeper/core/evaluation/__init__.py,sha256=FyZGpbTXcIM3BynssiS6wUm2KZkMnLVmKF50D7iqkXM,135
-beekeeper/core/evaluation/context_similarity.py,sha256=kDhVLofNjQ4QrRxT9yGPU-x0OAclAbA-qhnA8yFt078,2728
-beekeeper/core/flows/__init__.py,sha256=v6VLJ309l5bHYcG1JLUu6_kRoOwIZazonH4-n_UQzYQ,91
-beekeeper/core/flows/ingestion_flow.py,sha256=q3D-PC-Jlzz26CzU5HRsxMNRnRJyWU5i3Aj-n17BfEA,6144
-beekeeper/core/llms/__init__.py,sha256=8fPQqw9vYdQJtQlYyFF9YskFSQBHBstmqurPOyEb0rA,259
-beekeeper/core/llms/base.py,sha256=SiVt7JkjDdz6Oss7wdvn0XKWhR-Dhtel423sdsWJ9Bg,1144
-beekeeper/core/llms/decorators.py,sha256=lPfrQPFiCLAp468mVsiX6DIxSIvmrHjz3urnV9-q54U,3284
-beekeeper/core/llms/types.py,sha256=CqLsB78y6-y8actABp87nHQh3AhHcE5i4IjeUxaXyA4,722
-beekeeper/core/observers/__init__.py,sha256=O2TNVupW0UKBDnhPAoz1YB6uQXws7YdDPi-GOGw2il0,118
-beekeeper/core/observers/base.py,sha256=aCqjWtca-u3T-Ug6XxML8Ed-AfVxmVJFzE7OJjd64QI,901
-beekeeper/core/observers/types.py,sha256=s-4tB8OdeaCUIRvi6FLuib2u4Yl9evqQdCundNREXQY,217
-beekeeper/core/prompts/__init__.py,sha256=MKVxQKhA72Pm4oaVMDIRYE3fB70A2Jd2Re0UZSmFIoE,88
-beekeeper/core/prompts/base.py,sha256=m1CIPWleadHTxCpvNWO9JfYcIDoBebyv8oYSQ7JAa6s,743
-beekeeper/core/prompts/utils.py,sha256=Cqpefzzxd6DxPbOKVyUCsIs-ibBGKhYU6ppYqhPT9vM,1378
-beekeeper/core/readers/__init__.py,sha256=vPCmWmK92LYL-R0LFcPqjOKFHqxW0xUP5r6M9GNxoqY,157
-beekeeper/core/readers/base.py,sha256=WP_g-kw2MQmjcNaYa_BiuSsUnqoQzJZrze3phcZ2EWk,535
-beekeeper/core/readers/directory.py,sha256=Pgj-rb0XaSyqF2mp3S4cEqp3410xeSSPA49IDLrgWuE,2877
-beekeeper/core/text_chunkers/__init__.py,sha256=RIqqTGgn2BhbIgDGmTETw65DAQeI5Zls_A5H29jZVgA,366
-beekeeper/core/text_chunkers/base.py,sha256=RAtyMbW4huvphgF1WYO2ixxF8jpmbxQC6O-fyfuKihY,684
-beekeeper/core/text_chunkers/semantic.py,sha256=kFrELS_tCXMIovxmSbN_07TQa-hS-CbFI7GKnxyUuOM,5861
-beekeeper/core/text_chunkers/sentence.py,sha256=h1LYqtbhfIKdeFRU1dDEsqFKT-IpCKP01eFGWl0wlys,4090
-beekeeper/core/text_chunkers/token.py,sha256=7LBJMppJkn5GISGjhwppdD5EheSrZZZVY_aoWR208sg,3857
-beekeeper/core/text_chunkers/utils.py,sha256=ImYUlU3dC44MqB1_thal6skynMhR6RF-JuPEDRREBUg,4586
-beekeeper/core/tools/__init__.py,sha256=M0WCBWkKrkVcezlMtZqYtHiuWdCiJFk681JakWAFlVA,78
-beekeeper/core/tools/base.py,sha256=9O38vPqPSp8f656bcxHCGrWuYZWk6ghsVwjg36IDNF8,941
-beekeeper/core/utils/pairwise.py,sha256=cpi8GItPFSYP4sjB5zgTFHi6JfBVWsMnNu8koA9VYQU,536
-beekeeper/core/vector_stores/__init__.py,sha256=R5SRG3YpOZqRwIfBLB8KVV6FALWqhIzIhCjRGj-bwPc,93
-beekeeper/core/vector_stores/base.py,sha256=_nx_F_ySthostd4gVzRt6gwljZQfMLJLytZdRHIe0wU,1263
-beekeeper_core-1.0.1.dist-info/METADATA,sha256=HTpqTUMzHZ8MkscPSS3S3FuJhrOv7E1LfZc3TC039hw,1330
-beekeeper_core-1.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-beekeeper_core-1.0.1.dist-info/RECORD,,

{beekeeper_core-1.0.1.dist-info → beekeeper_core-1.0.10.dist-info}/WHEEL RENAMED Viewed

File without changes

beekeeper-core 1.0.1__py3-none-any.whl → 1.0.10__py3-none-any.whl

beekeeper-core 1.0.1py3-none-any.whl → 1.0.10py3-none-any.whl