PyPI - haystack-experimental - Versions diffs - 0.14.1__tar.gz → 0.14.3__tar.gz - Mend

haystack-experimental 0.14.1tar.gz → 0.14.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

{haystack_experimental-0.14.1 → haystack_experimental-0.14.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haystack-experimental
-Version: 0.14.1
+Version: 0.14.3
 Summary: Experimental components and features for the Haystack LLM framework.
 Project-URL: CI: GitHub, https://github.com/deepset-ai/haystack-experimental/actions
 Project-URL: GitHub: issues, https://github.com/deepset-ai/haystack-experimental/issues

{haystack_experimental-0.14.1 → haystack_experimental-0.14.3}/haystack_experimental/components/agents/agent.py RENAMED Viewed

@@ -5,6 +5,7 @@
 # pylint: disable=wrong-import-order,wrong-import-position,ungrouped-imports
 # ruff: noqa: I001
+import inspect
 from dataclasses import dataclass
 from typing import Any, Optional, Union
@@ -171,6 +172,7 @@ class Agent(HaystackAgent):
         requires_async: bool,
         *,
         system_prompt: Optional[str] = None,
+        generation_kwargs: Optional[dict[str, Any]] = None,
         tools: Optional[Union[ToolsType, list[str]]] = None,
         **kwargs: dict[str, Any],
     ) -> _ExecutionContext:
@@ -185,14 +187,28 @@ class Agent(HaystackAgent):
             When passing tool names, tools are selected from the Agent's originally configured tools.
         :param kwargs: Additional data to pass to the State used by the Agent.
         """
-        exe_context = super(Agent, self)._initialize_fresh_execution(
-            messages=messages,
-            streaming_callback=streaming_callback,
-            requires_async=requires_async,
-            system_prompt=system_prompt,
-            tools=tools,
-            **kwargs,
-        )
+        # The PR https://github.com/deepset-ai/haystack/pull/9616 added the generation_kwargs parameter to
+        # _initialize_fresh_execution. This change has been released in Haystack 2.20.0.
+        # To maintain compatibility with Haystack 2.19 we check the number of parameters and call accordingly.
+        if inspect.signature(super(Agent, self)._initialize_fresh_execution).parameters.get("generation_kwargs"):
+            exe_context = super(Agent, self)._initialize_fresh_execution(
+                messages=messages,
+                streaming_callback=streaming_callback,
+                requires_async=requires_async,
+                system_prompt=system_prompt,
+                generation_kwargs=generation_kwargs,
+                tools=tools,
+                **kwargs,
+            )
+        else:
+            exe_context = super(Agent, self)._initialize_fresh_execution(
+                messages=messages,
+                streaming_callback=streaming_callback,
+                requires_async=requires_async,
+                system_prompt=system_prompt,
+                tools=tools,
+                **kwargs,
+            )
         # NOTE: 1st difference with parent method to add this to tool_invoker_inputs
         if self._tool_invoker:
             exe_context.tool_invoker_inputs["enable_streaming_callback_passthrough"] = (
@@ -212,6 +228,7 @@ class Agent(HaystackAgent):
         streaming_callback: Optional[StreamingCallbackT],
         requires_async: bool,
         *,
+        generation_kwargs: Optional[dict[str, Any]] = None,
         tools: Optional[Union[ToolsType, list[str]]] = None,
     ) -> _ExecutionContext:
         """
@@ -220,12 +237,26 @@ class Agent(HaystackAgent):
         :param snapshot: An AgentSnapshot containing the state of a previously saved agent execution.
         :param streaming_callback: Optional callback for streaming responses.
         :param requires_async: Whether the agent run requires asynchronous execution.
+        :param generation_kwargs: Additional keyword arguments for chat generator. These parameters will
+            override the parameters passed during component initialization.
         :param tools: Optional list of Tool objects, a Toolset, or list of tool names to use for this run.
             When passing tool names, tools are selected from the Agent's originally configured tools.
         """
-        exe_context = super(Agent, self)._initialize_from_snapshot(
-            snapshot=snapshot, streaming_callback=streaming_callback, requires_async=requires_async, tools=tools
-        )
+        # The PR https://github.com/deepset-ai/haystack/pull/9616 added the generation_kwargs parameter to
+        # _initialize_from_snapshot. This change has been released in Haystack 2.20.0.
+        # To maintain compatibility with Haystack 2.19 we check the number of parameters and call accordingly.
+        if inspect.signature(super(Agent, self)._initialize_from_snapshot).parameters.get("generation_kwargs"):
+            exe_context = super(Agent, self)._initialize_from_snapshot(
+                snapshot=snapshot,
+                streaming_callback=streaming_callback,
+                requires_async=requires_async,
+                generation_kwargs=generation_kwargs,
+                tools=tools,
+            )
+        else:
+            exe_context = super(Agent, self)._initialize_from_snapshot(
+                snapshot=snapshot, streaming_callback=streaming_callback, requires_async=requires_async, tools=tools
+            )
         # NOTE: 1st difference with parent method to add this to tool_invoker_inputs
         if self._tool_invoker:
             exe_context.tool_invoker_inputs["enable_streaming_callback_passthrough"] = (
@@ -247,6 +278,7 @@ class Agent(HaystackAgent):
         messages: list[ChatMessage],
         streaming_callback: Optional[StreamingCallbackT] = None,
         *,
+        generation_kwargs: Optional[dict[str, Any]] = None,
         break_point: Optional[AgentBreakpoint] = None,
         snapshot: Optional[AgentSnapshot] = None,  # type: ignore[override]
         system_prompt: Optional[str] = None,
@@ -259,6 +291,8 @@ class Agent(HaystackAgent):
         :param messages: List of Haystack ChatMessage objects to process.
         :param streaming_callback: A callback that will be invoked when a response is streamed from the LLM.
             The same callback can be configured to emit tool results when a tool is called.
+        :param generation_kwargs: Additional keyword arguments for LLM. These parameters will
+            override the parameters passed during component initialization.
         :param break_point: An AgentBreakpoint, can be a Breakpoint for the "chat_generator" or a ToolBreakpoint
             for "tool_invoker".
         :param snapshot: A dictionary containing a snapshot of a previously saved agent execution. The snapshot contains
@@ -285,11 +319,21 @@ class Agent(HaystackAgent):
             "snapshot": snapshot,
             **kwargs,
         }
-        self._runtime_checks(break_point=break_point, snapshot=snapshot)
+        # The PR https://github.com/deepset-ai/haystack/pull/9987 removed the unused snapshot parameter from
+        # _runtime_checks. This change will be released in Haystack 2.20.0.
+        # To maintain compatibility with Haystack 2.19 we check the number of parameters and call accordingly.
+        if len(inspect.signature(self._runtime_checks).parameters) == 2:
+            self._runtime_checks(break_point, snapshot)  # type: ignore[call-arg]  # pylint: disable=too-many-function-args
+        else:
+            self._runtime_checks(break_point)  # type: ignore[call-arg]  # pylint: disable=no-value-for-parameter
         if snapshot:
             exe_context = self._initialize_from_snapshot(
-                snapshot=snapshot, streaming_callback=streaming_callback, requires_async=False, tools=tools
+                snapshot=snapshot,
+                streaming_callback=streaming_callback,
+                requires_async=False,
+                generation_kwargs=generation_kwargs,
+                tools=tools,
             )
         else:
             exe_context = self._initialize_fresh_execution(
@@ -297,6 +341,7 @@ class Agent(HaystackAgent):
                 streaming_callback=streaming_callback,
                 requires_async=False,
                 system_prompt=system_prompt,
+                generation_kwargs=generation_kwargs,
                 tools=tools,
                 **kwargs,
             )
@@ -431,6 +476,7 @@ class Agent(HaystackAgent):
         messages: list[ChatMessage],
         streaming_callback: Optional[StreamingCallbackT] = None,
         *,
+        generation_kwargs: Optional[dict[str, Any]] = None,
         break_point: Optional[AgentBreakpoint] = None,
         snapshot: Optional[AgentSnapshot] = None,  # type: ignore[override]
         system_prompt: Optional[str] = None,
@@ -447,6 +493,8 @@ class Agent(HaystackAgent):
         :param messages: List of Haystack ChatMessage objects to process.
         :param streaming_callback: An asynchronous callback that will be invoked when a response is streamed from the
             LLM. The same callback can be configured to emit tool results when a tool is called.
+        :param generation_kwargs: Additional keyword arguments for LLM. These parameters will
+            override the parameters passed during component initialization.
         :param break_point: An AgentBreakpoint, can be a Breakpoint for the "chat_generator" or a ToolBreakpoint
             for "tool_invoker".
         :param snapshot: A dictionary containing a snapshot of a previously saved agent execution. The snapshot contains
@@ -472,11 +520,21 @@ class Agent(HaystackAgent):
             "snapshot": snapshot,
             **kwargs,
         }
-        self._runtime_checks(break_point=break_point, snapshot=snapshot)
+        # The PR https://github.com/deepset-ai/haystack/pull/9987 removed the unused snapshot parameter from
+        # _runtime_checks. This change will be released in Haystack 2.20.0.
+        # To maintain compatibility with Haystack 2.19 we check the number of parameters and call accordingly.
+        if len(inspect.signature(self._runtime_checks).parameters) == 2:
+            self._runtime_checks(break_point, snapshot)  # type: ignore[call-arg]  # pylint: disable=too-many-function-args
+        else:
+            self._runtime_checks(break_point)  # type: ignore[call-arg]  # pylint: disable=no-value-for-parameter
         if snapshot:
             exe_context = self._initialize_from_snapshot(
-                snapshot=snapshot, streaming_callback=streaming_callback, requires_async=True, tools=tools
+                snapshot=snapshot,
+                streaming_callback=streaming_callback,
+                requires_async=True,
+                generation_kwargs=generation_kwargs,
+                tools=tools,
             )
         else:
             exe_context = self._initialize_fresh_execution(
@@ -484,6 +542,7 @@ class Agent(HaystackAgent):
                 streaming_callback=streaming_callback,
                 requires_async=True,
                 system_prompt=system_prompt,
+                generation_kwargs=generation_kwargs,
                 tools=tools,
                 **kwargs,
             )

{haystack_experimental-0.14.1 → haystack_experimental-0.14.3}/haystack_experimental/components/preprocessors/__init__.py RENAMED Viewed

@@ -9,10 +9,12 @@ from lazy_imports import LazyImporter
 _import_structure = {
     "embedding_based_document_splitter": ["EmbeddingBasedDocumentSplitter"],
+    "md_header_level_inferrer": ["MarkdownHeaderLevelInferrer"],
 }
 if TYPE_CHECKING:
     from .embedding_based_document_splitter import EmbeddingBasedDocumentSplitter
+    from .md_header_level_inferrer import MarkdownHeaderLevelInferrer
 else:
     sys.modules[__name__] = LazyImporter(name=__name__, module_file=__file__, import_structure=_import_structure)

{haystack_experimental-0.14.1 → haystack_experimental-0.14.3}/haystack_experimental/components/preprocessors/md_header_level_inferrer.py RENAMED Viewed

@@ -24,7 +24,7 @@ class MarkdownHeaderLevelInferrer:
     from haystack_experimental.components.preprocessors import MarkdownHeaderLevelInferrer
     # Create a document with uniform header levels
-    text = "## Title\nSome content\n## Section\nMore content\n## Subsection\nFinal content"
+    text = "## Title\n## Subheader\nSection\n## Subheader\nMore Content"
     doc = Document(content=text)
     # Initialize the inferrer and process the document
@@ -33,7 +33,7 @@ class MarkdownHeaderLevelInferrer:
     # The headers are now normalized with proper hierarchy
     print(result["documents"][0].content)
-    > # Title\nSome content\n## Section\nMore content\n### Subsection\nFinal content
+    > # Title\n## Subheader\nSection\n## Subheader\nMore Content
     ```
     """

{haystack_experimental-0.14.1 → haystack_experimental-0.14.3}/haystack_experimental/components/query/query_expander.py RENAMED Viewed

@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 import json
-from typing import Any, Dict, List, Optional
+from typing import Any, Optional
 from haystack import default_from_dict, default_to_dict, logging
 from haystack.components.builders.prompt_builder import PromptBuilder
@@ -91,7 +91,7 @@ class QueryExpander:
         prompt_template: Optional[str] = None,
         n_expansions: int = 4,
         include_original_query: bool = True,
-    ):
+    ) -> None:
         """
         Initialize the QueryExpander component.
@@ -99,7 +99,7 @@ class QueryExpander:
             If None, a default OpenAIChatGenerator with gpt-4.1-mini model is used.
         :param prompt_template: Custom [PromptBuilder](https://docs.haystack.deepset.ai/docs/promptbuilder)
             template for query expansion. The template should instruct the LLM to return a JSON response with the
-            structure: {"queries": ["query1", "query2", "query3"]}. The template should include 'query' and
+            structure: `{"queries": ["query1", "query2", "query3"]}`. The template should include 'query' and
             'n_expansions' variables.
         :param n_expansions: Number of alternative queries to generate (default: 4).
         :param include_original_query: Whether to include the original query in the output.
@@ -134,7 +134,6 @@ class QueryExpander:
             self.chat_generator = chat_generator
         self._is_warmed_up = False
-        self._supports_warm_up = hasattr(self.chat_generator, "warm_up")
         self.prompt_template = prompt_template or DEFAULT_PROMPT_TEMPLATE
         # Check if required variables are present in the template
@@ -153,7 +152,7 @@ class QueryExpander:
             required_variables=["n_expansions", "query"],
         )
-    def to_dict(self) -> Dict[str, Any]:
+    def to_dict(self) -> dict[str, Any]:
         """
         Serializes the component to a dictionary.
@@ -168,7 +167,7 @@ class QueryExpander:
         )
     @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "QueryExpander":
+    def from_dict(cls, data: dict[str, Any]) -> "QueryExpander":
         """
         Deserializes the component from a dictionary.
@@ -181,12 +180,8 @@ class QueryExpander:
         return default_from_dict(cls, data)
-    @component.output_types(queries=List[str])
-    def run(
-        self,
-        query: str,
-        n_expansions: Optional[int] = None,
-    ) -> Dict[str, List[str]]:
+    @component.output_types(queries=list[str])
+    def run(self, query: str, n_expansions: Optional[int] = None) -> dict[str, list[str]]:
         """
         Expand the input query into multiple semantically similar queries.
@@ -199,11 +194,10 @@ class QueryExpander:
             If include_original_query=True, the original query will be included in addition
             to the n_expansions alternative queries.
         :raises ValueError: If n_expansions is not positive (less than or equal to 0).
-        :raises RuntimeError: If the component is not warmed up and the chat generator does not support warm up.
         """
-        if not self._is_warmed_up and self._supports_warm_up:
-            raise RuntimeError("The component is not warmed up. Please call the `warm_up` method first.")
+        if not self._is_warmed_up:
+            self.warm_up()
         response = {"queries": [query] if self.include_original_query else []}
@@ -252,14 +246,15 @@ class QueryExpander:
     def warm_up(self):
         """
-        Warm up the underlying LLM if it supports it.
+        Warm up the LLM provider component.
         """
-        if not self._is_warmed_up and self._supports_warm_up:
-            self.chat_generator.warm_up()  # type: ignore[attr-defined]
+        if not self._is_warmed_up:
+            if hasattr(self.chat_generator, "warm_up"):
+                self.chat_generator.warm_up()
             self._is_warmed_up = True
     @staticmethod
-    def _parse_expanded_queries(generator_response: str) -> List[str]:
+    def _parse_expanded_queries(generator_response: str) -> list[str]:
         """
         Parse the generator response to extract individual expanded queries.

{haystack_experimental-0.14.1 → haystack_experimental-0.14.3}/haystack_experimental/components/retrievers/multi_query_embedding_retriever.py RENAMED Viewed

@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 from concurrent.futures import ThreadPoolExecutor
-from typing import Any, List, Optional
+from typing import Any, Optional
 from haystack import Document, component, default_from_dict, default_to_dict
 from haystack.components.embedders.types.protocol import TextEmbedder
@@ -74,13 +74,7 @@ class MultiQueryEmbeddingRetriever:
     ```
     """  # noqa E501
-    def __init__(
-        self,
-        *,
-        retriever: EmbeddingRetriever,
-        query_embedder: TextEmbedder,
-        max_workers: int = 3,
-    ):
+    def __init__(self, *, retriever: EmbeddingRetriever, query_embedder: TextEmbedder, max_workers: int = 3) -> None:
         """
         Initialize MultiQueryEmbeddingRetriever.
@@ -104,12 +98,8 @@ class MultiQueryEmbeddingRetriever:
                 self.retriever.warm_up()
             self._is_warmed_up = True
-    @component.output_types(documents=List[Document])
-    def run(
-        self,
-        queries: List[str],
-        retriever_kwargs: Optional[dict[str, Any]] = None,
-    ) -> dict[str, Any]:
+    @component.output_types(documents=list[Document])
+    def run(self, queries: list[str], retriever_kwargs: Optional[dict[str, Any]] = None) -> dict[str, list[Document]]:
         """
         Retrieve documents using multiple queries in parallel.
@@ -123,6 +113,9 @@ class MultiQueryEmbeddingRetriever:
         seen_contents = set()
         retriever_kwargs = retriever_kwargs or {}
+        if not self._is_warmed_up:
+            self.warm_up()
         with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
             queries_results = executor.map(lambda query: self._run_on_thread(query, retriever_kwargs), queries)
             for result in queries_results:
@@ -137,7 +130,7 @@ class MultiQueryEmbeddingRetriever:
         docs.sort(key=lambda x: x.score or 0.0, reverse=True)
         return {"documents": docs}
-    def _run_on_thread(self, query: str, retriever_kwargs: Optional[dict[str, Any]] = None) -> Optional[List[Document]]:
+    def _run_on_thread(self, query: str, retriever_kwargs: Optional[dict[str, Any]] = None) -> Optional[list[Document]]:
         """
         Process a single query on a separate thread.

{haystack_experimental-0.14.1 → haystack_experimental-0.14.3}/haystack_experimental/components/retrievers/multi_query_text_retriever.py RENAMED Viewed

@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 from concurrent.futures import ThreadPoolExecutor
-from typing import Any, List, Optional
+from typing import Any, Optional
 from haystack import Document, component, default_from_dict, default_to_dict
 from haystack.core.serialization import component_to_dict
@@ -57,11 +57,7 @@ class MultiQueryTextRetriever:
     ```
     """  # noqa E501
-    def __init__(
-        self,
-        retriever: TextRetriever,
-        max_workers: int = 3,
-    ):
+    def __init__(self, *, retriever: TextRetriever, max_workers: int = 3) -> None:
         """
         Initialize MultiQueryTextRetriever.
@@ -82,11 +78,7 @@ class MultiQueryTextRetriever:
             self._is_warmed_up = True
     @component.output_types(documents=list[Document])
-    def run(
-        self,
-        queries: List[str],
-        retriever_kwargs: Optional[dict[str, Any]] = None,
-    ) -> dict[str, Any]:
+    def run(self, queries: list[str], retriever_kwargs: Optional[dict[str, Any]] = None) -> dict[str, list[Document]]:
         """
         Retrieve documents using multiple queries in parallel.

{haystack_experimental-0.14.1 → haystack_experimental-0.14.3}/haystack_experimental/core/pipeline/breakpoint.py RENAMED Viewed

@@ -2,12 +2,12 @@
 #
 # SPDX-License-Identifier: Apache-2.0
-from copy import deepcopy
 from dataclasses import replace
 from datetime import datetime
 from typing import TYPE_CHECKING, Any, Optional
 from haystack import logging
+from haystack.core.pipeline.utils import _deepcopy_with_exceptions
 from haystack.dataclasses.breakpoints import AgentBreakpoint, PipelineSnapshot, PipelineState, ToolBreakpoint
 from haystack.utils.base_serialization import _serialize_value_with_schema
 from haystack.utils.misc import _get_output_dir
@@ -44,8 +44,10 @@ def _create_agent_snapshot(
     """
     return AgentSnapshot(
         component_inputs={
-            "chat_generator": _serialize_value_with_schema(deepcopy(component_inputs["chat_generator"])),
-            "tool_invoker": _serialize_value_with_schema(deepcopy(component_inputs["tool_invoker"])),
+            "chat_generator": _serialize_value_with_schema(
+                _deepcopy_with_exceptions(component_inputs["chat_generator"])
+            ),
+            "tool_invoker": _serialize_value_with_schema(_deepcopy_with_exceptions(component_inputs["tool_invoker"])),
         },
         component_visits=component_visits,
         break_point=agent_breakpoint,