PyPI - sie-haystack - Versions diffs - 0.1.7__py3-none-any.whl - Mend

sie-haystack 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

sie_haystack/__init__.py +62 -0
sie_haystack/embedders.py +418 -0
sie_haystack/extractors.py +144 -0
sie_haystack/rankers.py +139 -0
sie_haystack-0.1.7.dist-info/METADATA +57 -0
sie_haystack-0.1.7.dist-info/RECORD +7 -0
sie_haystack-0.1.7.dist-info/WHEEL +4 -0

sie_haystack/__init__.py ADDED Viewed

@@ -0,0 +1,62 @@
+"""SIE integration for Haystack.
+This package provides Haystack components that use SIE for inference:
+Dense Embedders:
+- SIETextEmbedder: Embeds single text strings (queries)
+- SIEDocumentEmbedder: Embeds documents and stores embeddings on them
+Sparse Embedders (for hybrid search):
+- SIESparseTextEmbedder: Sparse embeddings for queries
+- SIESparseDocumentEmbedder: Sparse embeddings for documents
+Rankers and Extractors:
+- SIERanker: Reranks documents by relevance to a query
+- SIEExtractor: Extracts entities from text
+Example usage:
+    from haystack import Document
+    from sie_haystack import SIETextEmbedder, SIEDocumentEmbedder, SIERanker
+    # Embed a query
+    text_embedder = SIETextEmbedder(base_url="http://localhost:8080", model="BAAI/bge-m3")
+    result = text_embedder.run(text="What is machine learning?")
+    query_embedding = result["embedding"]
+    # Embed documents
+    doc_embedder = SIEDocumentEmbedder(base_url="http://localhost:8080", model="BAAI/bge-m3")
+    docs = [Document(content="Python is a programming language.")]
+    result = doc_embedder.run(documents=docs)
+    embedded_docs = result["documents"]
+    # Rerank documents
+    ranker = SIERanker(base_url="http://localhost:8080", model="jinaai/jina-reranker-v2-base-multilingual")
+    result = ranker.run(query="What is Python?", documents=embedded_docs, top_k=3)
+    ranked_docs = result["documents"]
+Hybrid search example:
+    from sie_haystack import SIESparseTextEmbedder, SIESparseDocumentEmbedder
+    # Sparse embeddings for hybrid search with Qdrant
+    sparse_text_embedder = SIESparseTextEmbedder(model="BAAI/bge-m3")
+    result = sparse_text_embedder.run(text="What is machine learning?")
+    sparse_embedding = result["sparse_embedding"]  # {"indices": [...], "values": [...]}
+"""
+from sie_haystack.embedders import (
+    SIEDocumentEmbedder,
+    SIESparseDocumentEmbedder,
+    SIESparseTextEmbedder,
+    SIETextEmbedder,
+)
+from sie_haystack.extractors import SIEExtractor
+from sie_haystack.rankers import SIERanker
+__all__ = [
+    "SIEDocumentEmbedder",
+    "SIEExtractor",
+    "SIERanker",
+    "SIESparseDocumentEmbedder",
+    "SIESparseTextEmbedder",
+    "SIETextEmbedder",
+]

sie_haystack/embedders.py ADDED Viewed

@@ -0,0 +1,418 @@
+"""SIE embedding components for Haystack.
+Provides embedder components following Haystack's conventions:
+- SIETextEmbedder: For embedding single text strings (queries) - dense embeddings
+- SIEDocumentEmbedder: For embedding documents - dense embeddings
+- SIESparseTextEmbedder: For sparse embeddings of queries (hybrid search)
+- SIESparseDocumentEmbedder: For sparse embeddings of documents (hybrid search)
+"""
+from __future__ import annotations
+from typing import Any
+from haystack import Document, component
+@component
+class SIETextEmbedder:
+    """Embeds a single text string using SIE.
+    Use this component for embedding queries in retrieval pipelines.
+    Example:
+        >>> embedder = SIETextEmbedder(base_url="http://localhost:8080", model="BAAI/bge-m3")
+        >>> result = embedder.run(text="What is vector search?")
+        >>> embedding = result["embedding"]  # list[float]
+    """
+    def __init__(
+        self,
+        base_url: str = "http://localhost:8080",
+        model: str = "BAAI/bge-m3",
+        *,
+        gpu: str | None = None,
+        options: dict[str, Any] | None = None,
+        timeout_s: float = 180.0,
+    ) -> None:
+        """Initialize the text embedder.
+        Args:
+            base_url: URL of the SIE server.
+            model: Model name to use for encoding.
+            gpu: GPU type to use (e.g., "l4", "a100"). Passed to SDK as default.
+            options: Model-specific options. Passed to SDK as default.
+            timeout_s: Request timeout in seconds.
+        """
+        self._base_url = base_url
+        self._model = model
+        self._gpu = gpu
+        self._options = options
+        self._timeout_s = timeout_s
+        self._client: Any = None
+    @property
+    def client(self) -> Any:
+        """Lazily initialize the SIE client."""
+        if self._client is None:
+            from sie_sdk import SIEClient
+            self._client = SIEClient(
+                self._base_url,
+                timeout_s=self._timeout_s,
+                gpu=self._gpu,
+                options=self._options,
+            )
+        return self._client
+    def warm_up(self) -> None:
+        """Warm up the component by initializing the client."""
+        _ = self.client
+    @component.output_types(embedding=list[float])
+    def run(self, text: str) -> dict[str, list[float]]:
+        """Embed a single text string.
+        Args:
+            text: The text to embed.
+        Returns:
+            Dictionary with "embedding" key containing the embedding vector.
+        """
+        from sie_sdk.types import Item
+        result = self.client.encode(
+            self._model,
+            Item(text=text),
+            output_types=["dense"],
+            options={"is_query": True},
+        )
+        embedding = self._extract_dense(result)
+        return {"embedding": embedding}
+    def _extract_dense(self, result: Any) -> list[float]:
+        """Extract dense embedding from SDK result."""
+        # SDK returns {"dense": np.ndarray, ...}
+        dense = result.get("dense") if isinstance(result, dict) else getattr(result, "dense", None)
+        if dense is None:
+            return []
+        return dense.tolist() if hasattr(dense, "tolist") else list(dense)
+@component
+class SIEDocumentEmbedder:
+    """Embeds documents using SIE and stores embeddings on each document.
+    Use this component for embedding documents before indexing.
+    Example:
+        >>> from haystack import Document
+        >>> embedder = SIEDocumentEmbedder(base_url="http://localhost:8080", model="BAAI/bge-m3")
+        >>> docs = [Document(content="Python is a programming language.")]
+        >>> result = embedder.run(documents=docs)
+        >>> embedded_docs = result["documents"]
+        >>> print(embedded_docs[0].embedding)  # list[float]
+    """
+    def __init__(
+        self,
+        base_url: str = "http://localhost:8080",
+        model: str = "BAAI/bge-m3",
+        *,
+        gpu: str | None = None,
+        options: dict[str, Any] | None = None,
+        timeout_s: float = 180.0,
+        meta_fields_to_embed: list[str] | None = None,
+    ) -> None:
+        """Initialize the document embedder.
+        Args:
+            base_url: URL of the SIE server.
+            model: Model name to use for encoding.
+            gpu: GPU type to use (e.g., "l4", "a100"). Passed to SDK as default.
+            options: Model-specific options. Passed to SDK as default.
+            timeout_s: Request timeout in seconds.
+            meta_fields_to_embed: List of metadata fields to include in embedding.
+        """
+        self._base_url = base_url
+        self._model = model
+        self._gpu = gpu
+        self._options = options
+        self._timeout_s = timeout_s
+        self._meta_fields_to_embed = meta_fields_to_embed or []
+        self._client: Any = None
+    @property
+    def client(self) -> Any:
+        """Lazily initialize the SIE client."""
+        if self._client is None:
+            from sie_sdk import SIEClient
+            self._client = SIEClient(
+                self._base_url,
+                timeout_s=self._timeout_s,
+                gpu=self._gpu,
+                options=self._options,
+            )
+        return self._client
+    def warm_up(self) -> None:
+        """Warm up the component by initializing the client."""
+        _ = self.client
+    @component.output_types(documents=list[Document])
+    def run(self, documents: list[Document]) -> dict[str, list[Document]]:
+        """Embed documents and store embeddings on each document.
+        Args:
+            documents: List of documents to embed.
+        Returns:
+            Dictionary with "documents" key containing documents with embeddings.
+        """
+        if not documents:
+            return {"documents": []}
+        from sie_sdk.types import Item
+        # Build text to embed for each document
+        texts = [self._build_text(doc) for doc in documents]
+        items = [Item(text=text) for text in texts]
+        # Batch encode
+        results = self.client.encode(
+            self._model,
+            items,
+            output_types=["dense"],
+        )
+        # Store embeddings on documents
+        for doc, result in zip(documents, results, strict=True):
+            doc.embedding = self._extract_dense(result)
+        return {"documents": documents}
+    def _build_text(self, doc: Document) -> str:
+        """Build the text to embed for a document.
+        Optionally includes metadata fields.
+        """
+        parts = [str(doc.meta[field]) for field in self._meta_fields_to_embed if field in doc.meta]
+        parts.append(doc.content or "")
+        return " ".join(parts)
+    def _extract_dense(self, result: Any) -> list[float]:
+        """Extract dense embedding from SDK result."""
+        # SDK returns {"dense": np.ndarray, ...}
+        dense = result.get("dense") if isinstance(result, dict) else getattr(result, "dense", None)
+        if dense is None:
+            return []
+        return dense.tolist() if hasattr(dense, "tolist") else list(dense)
+@component
+class SIESparseTextEmbedder:
+    """Embeds a single text string using SIE sparse embeddings.
+    Use this component for embedding queries in hybrid search pipelines.
+    Works with QdrantHybridRetriever and other hybrid retrievers.
+    Example:
+        >>> embedder = SIESparseTextEmbedder(base_url="http://localhost:8080", model="BAAI/bge-m3")
+        >>> result = embedder.run(text="What is vector search?")
+        >>> sparse_embedding = result["sparse_embedding"]  # dict with indices/values
+    """
+    def __init__(
+        self,
+        base_url: str = "http://localhost:8080",
+        model: str = "BAAI/bge-m3",
+        *,
+        gpu: str | None = None,
+        options: dict[str, Any] | None = None,
+        timeout_s: float = 180.0,
+    ) -> None:
+        """Initialize the sparse text embedder.
+        Args:
+            base_url: URL of the SIE server.
+            model: Model name to use for encoding. Must support sparse output (e.g., BAAI/bge-m3).
+            gpu: GPU type to use (e.g., "l4", "a100"). Passed to SDK as default.
+            options: Model-specific options. Passed to SDK as default.
+            timeout_s: Request timeout in seconds.
+        """
+        self._base_url = base_url
+        self._model = model
+        self._gpu = gpu
+        self._options = options
+        self._timeout_s = timeout_s
+        self._client: Any = None
+    @property
+    def client(self) -> Any:
+        """Lazily initialize the SIE client."""
+        if self._client is None:
+            from sie_sdk import SIEClient
+            self._client = SIEClient(
+                self._base_url,
+                timeout_s=self._timeout_s,
+                gpu=self._gpu,
+                options=self._options,
+            )
+        return self._client
+    def warm_up(self) -> None:
+        """Warm up the component by initializing the client."""
+        _ = self.client
+    @component.output_types(sparse_embedding=dict)
+    def run(self, text: str) -> dict[str, dict[str, list]]:
+        """Embed a single text string with sparse embeddings.
+        Args:
+            text: The text to embed.
+        Returns:
+            Dictionary with "sparse_embedding" key containing dict with "indices" and "values" lists.
+        """
+        from sie_sdk.types import Item
+        result = self.client.encode(
+            self._model,
+            Item(text=text),
+            output_types=["sparse"],
+            options={"is_query": True},
+        )
+        sparse_embedding = self._extract_sparse(result)
+        return {"sparse_embedding": sparse_embedding}
+    def _extract_sparse(self, result: Any) -> dict[str, list]:
+        """Extract sparse embedding from SDK result."""
+        # SDK returns {"sparse": {"indices": np.ndarray, "values": np.ndarray}, ...}
+        sparse = result.get("sparse") if isinstance(result, dict) else getattr(result, "sparse", None)
+        if sparse is None:
+            return {"indices": [], "values": []}
+        indices = sparse.get("indices") if isinstance(sparse, dict) else getattr(sparse, "indices", None)
+        values = sparse.get("values") if isinstance(sparse, dict) else getattr(sparse, "values", None)
+        return {
+            "indices": indices.tolist() if hasattr(indices, "tolist") else list(indices or []),
+            "values": values.tolist() if hasattr(values, "tolist") else list(values or []),
+        }
+@component
+class SIESparseDocumentEmbedder:
+    """Embeds documents using SIE sparse embeddings and stores them on each document.
+    Use this component for embedding documents before indexing in hybrid search pipelines.
+    Works with QdrantDocumentStore(use_sparse_embeddings=True).
+    Example:
+        >>> from haystack import Document
+        >>> embedder = SIESparseDocumentEmbedder(base_url="http://localhost:8080", model="BAAI/bge-m3")
+        >>> docs = [Document(content="Python is a programming language.")]
+        >>> result = embedder.run(documents=docs)
+        >>> embedded_docs = result["documents"]
+        >>> print(embedded_docs[0].meta["_sparse_embedding"])  # dict with indices/values
+    """
+    def __init__(
+        self,
+        base_url: str = "http://localhost:8080",
+        model: str = "BAAI/bge-m3",
+        *,
+        gpu: str | None = None,
+        options: dict[str, Any] | None = None,
+        timeout_s: float = 180.0,
+        meta_fields_to_embed: list[str] | None = None,
+    ) -> None:
+        """Initialize the sparse document embedder.
+        Args:
+            base_url: URL of the SIE server.
+            model: Model name to use for encoding. Must support sparse output (e.g., BAAI/bge-m3).
+            gpu: GPU type to use (e.g., "l4", "a100"). Passed to SDK as default.
+            options: Model-specific options. Passed to SDK as default.
+            timeout_s: Request timeout in seconds.
+            meta_fields_to_embed: List of metadata fields to include in embedding.
+        """
+        self._base_url = base_url
+        self._model = model
+        self._gpu = gpu
+        self._options = options
+        self._timeout_s = timeout_s
+        self._meta_fields_to_embed = meta_fields_to_embed or []
+        self._client: Any = None
+    @property
+    def client(self) -> Any:
+        """Lazily initialize the SIE client."""
+        if self._client is None:
+            from sie_sdk import SIEClient
+            self._client = SIEClient(
+                self._base_url,
+                timeout_s=self._timeout_s,
+                gpu=self._gpu,
+                options=self._options,
+            )
+        return self._client
+    def warm_up(self) -> None:
+        """Warm up the component by initializing the client."""
+        _ = self.client
+    @component.output_types(documents=list[Document])
+    def run(self, documents: list[Document]) -> dict[str, list[Document]]:
+        """Embed documents with sparse embeddings and store on each document.
+        Args:
+            documents: List of documents to embed.
+        Returns:
+            Dictionary with "documents" key containing documents with sparse embeddings.
+        """
+        if not documents:
+            return {"documents": []}
+        from sie_sdk.types import Item
+        # Build text to embed for each document
+        texts = [self._build_text(doc) for doc in documents]
+        items = [Item(text=text) for text in texts]
+        # Batch encode with sparse output
+        results = self.client.encode(
+            self._model,
+            items,
+            output_types=["sparse"],
+        )
+        # Store sparse embeddings on documents in meta
+        for doc, result in zip(documents, results, strict=True):
+            doc.meta["_sparse_embedding"] = self._extract_sparse(result)
+        return {"documents": documents}
+    def _build_text(self, doc: Document) -> str:
+        """Build the text to embed for a document.
+        Optionally includes metadata fields.
+        """
+        parts = [str(doc.meta[field]) for field in self._meta_fields_to_embed if field in doc.meta]
+        parts.append(doc.content or "")
+        return " ".join(parts)
+    def _extract_sparse(self, result: Any) -> dict[str, list]:
+        """Extract sparse embedding from SDK result."""
+        # SDK returns {"sparse": {"indices": np.ndarray, "values": np.ndarray}, ...}
+        sparse = result.get("sparse") if isinstance(result, dict) else getattr(result, "sparse", None)
+        if sparse is None:
+            return {"indices": [], "values": []}
+        indices = sparse.get("indices") if isinstance(sparse, dict) else getattr(sparse, "indices", None)
+        values = sparse.get("values") if isinstance(sparse, dict) else getattr(sparse, "values", None)
+        return {
+            "indices": indices.tolist() if hasattr(indices, "tolist") else list(indices or []),
+            "values": values.tolist() if hasattr(values, "tolist") else list(values or []),
+        }

sie_haystack/extractors.py ADDED Viewed

@@ -0,0 +1,144 @@
+"""SIE extractor component for Haystack.
+Provides SIEExtractor for extracting entities from text.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+from haystack import component
+@dataclass
+class Entity:
+    """Extracted entity with position and label information."""
+    text: str
+    label: str
+    score: float
+    start: int
+    end: int
+@component
+class SIEExtractor:
+    """Extracts entities from text using SIE.
+    Use this component to extract named entities or custom entity types
+    from text using GLiNER or similar extraction models.
+    Example:
+        >>> extractor = SIEExtractor(
+        ...     base_url="http://localhost:8080",
+        ...     model="urchade/gliner_multi-v2.1",
+        ...     labels=["person", "organization", "location"],
+        ... )
+        >>> result = extractor.run(text="John Smith works at Google in New York.")
+        >>> entities = result["entities"]
+        >>> for entity in entities:
+        ...     print(f"{entity.text} ({entity.label}): {entity.score:.2f}")
+    """
+    def __init__(
+        self,
+        base_url: str = "http://localhost:8080",
+        model: str = "urchade/gliner_multi-v2.1",
+        labels: list[str] | None = None,
+        *,
+        gpu: str | None = None,
+        options: dict[str, Any] | None = None,
+        timeout_s: float = 180.0,
+    ) -> None:
+        """Initialize the extractor.
+        Args:
+            base_url: URL of the SIE server.
+            model: Model name to use for extraction.
+            labels: Entity labels to extract (e.g., ["person", "organization"]).
+            gpu: GPU type to use (e.g., "l4", "a100"). Passed to SDK as default.
+            options: Model-specific options. Passed to SDK as default.
+            timeout_s: Request timeout in seconds.
+        """
+        self._base_url = base_url
+        self._model = model
+        self._labels = labels or ["person", "organization", "location"]
+        self._gpu = gpu
+        self._options = options
+        self._timeout_s = timeout_s
+        self._client: Any = None
+    @property
+    def client(self) -> Any:
+        """Lazily initialize the SIE client."""
+        if self._client is None:
+            from sie_sdk import SIEClient
+            self._client = SIEClient(
+                self._base_url,
+                timeout_s=self._timeout_s,
+                gpu=self._gpu,
+                options=self._options,
+            )
+        return self._client
+    def warm_up(self) -> None:
+        """Warm up the component by initializing the client."""
+        _ = self.client
+    @component.output_types(entities=list[Entity])
+    def run(
+        self,
+        text: str,
+        labels: list[str] | None = None,
+    ) -> dict[str, list[Entity]]:
+        """Extract entities from text.
+        Args:
+            text: The text to extract entities from.
+            labels: Override the configured labels for this call.
+        Returns:
+            Dictionary with "entities" key containing extracted entities.
+        """
+        from sie_sdk.types import Item
+        effective_labels = labels if labels is not None else self._labels
+        result = self.client.extract(
+            self._model,
+            Item(text=text),
+            labels=effective_labels,
+        )
+        entities = self._build_entities(result)
+        return {"entities": entities}
+    def _build_entities(self, result: Any) -> list[Entity]:
+        """Build Entity objects from SDK result."""
+        entities = []
+        # Result could be a list of entities
+        items = result if isinstance(result, list) else []
+        for item in items:
+            if isinstance(item, dict):
+                entity = Entity(
+                    text=item.get("text", ""),
+                    label=item.get("label", ""),
+                    score=float(item.get("score", 0.0)),
+                    start=int(item.get("start", 0)),
+                    end=int(item.get("end", 0)),
+                )
+            else:
+                entity = Entity(
+                    text=getattr(item, "text", ""),
+                    label=getattr(item, "label", ""),
+                    score=float(getattr(item, "score", 0.0)),
+                    start=int(getattr(item, "start", 0)),
+                    end=int(getattr(item, "end", 0)),
+                )
+            entities.append(entity)
+        return entities

sie_haystack/rankers.py ADDED Viewed

@@ -0,0 +1,139 @@
+"""SIE ranker component for Haystack.
+Provides SIERanker for reranking documents by relevance to a query.
+"""
+from __future__ import annotations
+from typing import Any
+from haystack import Document, component
+@component
+class SIERanker:
+    """Reranks documents by relevance to a query using SIE.
+    Use this component to improve retrieval precision by reranking
+    candidate documents with a cross-encoder model.
+    Example:
+        >>> from haystack import Document
+        >>> ranker = SIERanker(
+        ...     base_url="http://localhost:8080",
+        ...     model="jinaai/jina-reranker-v2-base-multilingual",
+        ...     top_k=3,
+        ... )
+        >>> docs = [
+        ...     Document(content="Python is a programming language."),
+        ...     Document(content="The weather is sunny today."),
+        ...     Document(content="Machine learning uses statistical models."),
+        ... ]
+        >>> result = ranker.run(query="What is Python?", documents=docs)
+        >>> ranked_docs = result["documents"]  # Top 3 most relevant
+    """
+    def __init__(
+        self,
+        base_url: str = "http://localhost:8080",
+        model: str = "jinaai/jina-reranker-v2-base-multilingual",
+        *,
+        top_k: int | None = None,
+        gpu: str | None = None,
+        options: dict[str, Any] | None = None,
+        timeout_s: float = 180.0,
+    ) -> None:
+        """Initialize the ranker.
+        Args:
+            base_url: URL of the SIE server.
+            model: Model name to use for scoring.
+            top_k: Maximum number of documents to return. If None, returns all.
+            gpu: GPU type to use (e.g., "l4", "a100"). Passed to SDK as default.
+            options: Model-specific options. Passed to SDK as default.
+            timeout_s: Request timeout in seconds.
+        """
+        self._base_url = base_url
+        self._model = model
+        self._top_k = top_k
+        self._gpu = gpu
+        self._options = options
+        self._timeout_s = timeout_s
+        self._client: Any = None
+    @property
+    def client(self) -> Any:
+        """Lazily initialize the SIE client."""
+        if self._client is None:
+            from sie_sdk import SIEClient
+            self._client = SIEClient(
+                self._base_url,
+                timeout_s=self._timeout_s,
+                gpu=self._gpu,
+                options=self._options,
+            )
+        return self._client
+    def warm_up(self) -> None:
+        """Warm up the component by initializing the client."""
+        _ = self.client
+    @component.output_types(documents=list[Document])
+    def run(
+        self,
+        query: str,
+        documents: list[Document],
+        top_k: int | None = None,
+    ) -> dict[str, list[Document]]:
+        """Rerank documents by relevance to the query.
+        Args:
+            query: The query string to rank against.
+            documents: List of documents to rerank.
+            top_k: Override the configured top_k for this call.
+        Returns:
+            Dictionary with "documents" key containing ranked documents.
+        """
+        if not documents:
+            return {"documents": []}
+        from sie_sdk.types import Item
+        # Prepare items
+        query_item = Item(text=query)
+        doc_items = [Item(text=doc.content or "") for doc in documents]
+        # Score documents
+        results = self.client.score(self._model, query_item, doc_items)
+        # Build scored documents
+        scored_docs = []
+        for doc, result in zip(documents, results, strict=True):
+            score = self._extract_score(result)
+            # Store score in document metadata
+            doc_with_score = Document(
+                id=doc.id,
+                content=doc.content,
+                meta={**doc.meta, "score": score},
+                embedding=doc.embedding,
+            )
+            scored_docs.append((score, doc_with_score))
+        # Sort by score descending
+        scored_docs.sort(key=lambda x: x[0], reverse=True)
+        # Apply top_k
+        effective_top_k = top_k if top_k is not None else self._top_k
+        ranked_docs = [doc for _, doc in scored_docs]
+        if effective_top_k is not None:
+            ranked_docs = ranked_docs[:effective_top_k]
+        return {"documents": ranked_docs}
+    def _extract_score(self, result: Any) -> float:
+        """Extract score from SDK result."""
+        if isinstance(result, dict):
+            return float(result.get("score", 0.0))
+        return float(getattr(result, "score", 0.0))

sie_haystack-0.1.7.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,57 @@
+Metadata-Version: 2.4
+Name: sie-haystack
+Version: 0.1.7
+Summary: SIE integration for Haystack
+Author-email: Superlinked <dev@superlinked.com>
+License: Apache-2.0
+Classifier: Development Status :: 4 - Beta
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.10
+Requires-Dist: haystack-ai>=2.0.0
+Requires-Dist: sie-sdk>=0.1.0
+Provides-Extra: dev
+Requires-Dist: chroma-haystack>=2.0.0; extra == 'dev'
+Requires-Dist: chromadb>=0.4.0; extra == 'dev'
+Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
+Requires-Dist: pytest>=8.0.0; extra == 'dev'
+Description-Content-Type: text/markdown
+# sie-haystack
+SIE integration for Haystack.
+## Installation
+```bash
+pip install sie-haystack
+```
+## Usage
+```python
+from haystack import Document
+from sie_haystack import SIETextEmbedder, SIEDocumentEmbedder, SIERanker
+# Embed a query
+text_embedder = SIETextEmbedder(base_url="http://localhost:8080", model="BAAI/bge-m3")
+result = text_embedder.run(text="What is machine learning?")
+query_embedding = result["embedding"]
+# Embed documents
+doc_embedder = SIEDocumentEmbedder(base_url="http://localhost:8080", model="BAAI/bge-m3")
+docs = [Document(content="Python is a programming language.")]
+result = doc_embedder.run(documents=docs)
+embedded_docs = result["documents"]
+# Rerank documents
+ranker = SIERanker(
+    base_url="http://localhost:8080",
+    model="jinaai/jina-reranker-v2-base-multilingual"
+)
+result = ranker.run(query="What is Python?", documents=embedded_docs, top_k=3)
+ranked_docs = result["documents"]
+```

sie_haystack-0.1.7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+sie_haystack/__init__.py,sha256=wFzY72JlWncGhYsOLI3QQjRiDZt_o2XN_LbMDYCOZfo,2188
+sie_haystack/embedders.py,sha256=LxoPZggHVs1VLSMWN5uRYqjXAR-HbhqRiA9KrUDnsRg,14941
+sie_haystack/extractors.py,sha256=-nLhiD0pXBexUVlS3BpmWntJ90XUTnY3Ip2ymU1adTU,4424
+sie_haystack/rankers.py,sha256=6TBEkw4apUqkct-jHCZGNtQ1N3inLLQpk0PfVO-FdqQ,4544
+sie_haystack-0.1.7.dist-info/METADATA,sha256=ZgQeGO0GMuezRIWyeu3saa2nIGs5v0PYLOAKFvrxQ4k,1747
+sie_haystack-0.1.7.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+sie_haystack-0.1.7.dist-info/RECORD,,

sie_haystack-0.1.7.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.29.0
+Root-Is-Purelib: true
+Tag: py3-none-any