haystack-experimental 0.13.0__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. haystack_experimental/components/agents/__init__.py +16 -0
  2. haystack_experimental/components/agents/agent.py +633 -0
  3. haystack_experimental/components/agents/human_in_the_loop/__init__.py +35 -0
  4. haystack_experimental/components/agents/human_in_the_loop/breakpoint.py +63 -0
  5. haystack_experimental/components/agents/human_in_the_loop/dataclasses.py +72 -0
  6. haystack_experimental/components/agents/human_in_the_loop/errors.py +28 -0
  7. haystack_experimental/components/agents/human_in_the_loop/policies.py +78 -0
  8. haystack_experimental/components/agents/human_in_the_loop/strategies.py +455 -0
  9. haystack_experimental/components/agents/human_in_the_loop/types.py +89 -0
  10. haystack_experimental/components/agents/human_in_the_loop/user_interfaces.py +209 -0
  11. haystack_experimental/components/generators/chat/openai.py +8 -10
  12. haystack_experimental/components/preprocessors/embedding_based_document_splitter.py +18 -6
  13. haystack_experimental/components/preprocessors/md_header_level_inferrer.py +146 -0
  14. haystack_experimental/components/summarizers/__init__.py +7 -0
  15. haystack_experimental/components/summarizers/llm_summarizer.py +317 -0
  16. haystack_experimental/core/__init__.py +3 -0
  17. haystack_experimental/core/pipeline/__init__.py +3 -0
  18. haystack_experimental/core/pipeline/breakpoint.py +119 -0
  19. haystack_experimental/dataclasses/__init__.py +3 -0
  20. haystack_experimental/dataclasses/breakpoints.py +53 -0
  21. {haystack_experimental-0.13.0.dist-info → haystack_experimental-0.14.1.dist-info}/METADATA +29 -14
  22. {haystack_experimental-0.13.0.dist-info → haystack_experimental-0.14.1.dist-info}/RECORD +25 -7
  23. {haystack_experimental-0.13.0.dist-info → haystack_experimental-0.14.1.dist-info}/WHEEL +0 -0
  24. {haystack_experimental-0.13.0.dist-info → haystack_experimental-0.14.1.dist-info}/licenses/LICENSE +0 -0
  25. {haystack_experimental-0.13.0.dist-info → haystack_experimental-0.14.1.dist-info}/licenses/LICENSE-MIT.txt +0 -0
@@ -0,0 +1,317 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from typing import Any, Optional
6
+
7
+ from haystack import Document, component, default_from_dict, default_to_dict, logging
8
+ from haystack.components.generators.chat.types import ChatGenerator
9
+ from haystack.components.preprocessors import RecursiveDocumentSplitter
10
+ from haystack.core.serialization import component_to_dict
11
+ from haystack.dataclasses import ChatMessage
12
+ from haystack.utils import deserialize_chatgenerator_inplace
13
+ from tqdm import tqdm
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @component
19
+ class LLMSummarizer:
20
+ """
21
+ Summarizes text using a language model.
22
+
23
+ It's inspired by code from the OpenAI blog post: https://cookbook.openai.com/examples/summarizing_long_documents
24
+
25
+ Example
26
+ ```python
27
+ from haystack_experimental.components.summarizers.summarizer import Summarizer
28
+ from haystack.components.generators.chat import OpenAIChatGenerator
29
+ from haystack import Document
30
+
31
+ text = ("Machine learning is a subset of artificial intelligence that provides systems "
32
+ "the ability to automatically learn and improve from experience without being "
33
+ "explicitly programmed. The process of learning begins with observations or data. "
34
+ "Supervised learning algorithms build a mathematical model of sample data, known as "
35
+ "training data, in order to make predictions or decisions. Unsupervised learning "
36
+ "algorithms take a set of data that contains only inputs and find structure in the data. "
37
+ "Reinforcement learning is an area of machine learning where an agent learns to behave "
38
+ "in an environment by performing actions and seeing the results. Deep learning uses "
39
+ "artificial neural networks to model complex patterns in data. Neural networks consist "
40
+ "of layers of connected nodes, each performing a simple computation.")
41
+
42
+ doc = Document(content=text)
43
+ chat_generator = OpenAIChatGenerator(model="gpt-4")
44
+ summarizer = Summarizer(chat_generator=chat_generator)
45
+ summarizer.run(documents=[doc])
46
+ ```
47
+ """
48
+
49
+ def __init__( # pylint: disable=too-many-positional-arguments
50
+ self,
51
+ chat_generator: ChatGenerator,
52
+ system_prompt: Optional[str] = "Rewrite this text in summarized form.",
53
+ summary_detail: float = 0,
54
+ minimum_chunk_size: Optional[int] = 500,
55
+ chunk_delimiter: str = ".",
56
+ summarize_recursively: bool = False,
57
+ split_overlap: int = 0,
58
+ ):
59
+ """
60
+ Initialize the Summarizer component.
61
+
62
+ :param chat_generator: A ChatGenerator instance to use for summarization.
63
+ :param system_prompt: The prompt to instruct the LLM to summarise text, if not given defaults to:
64
+ "Rewrite this text in summarized form."
65
+ :param summary_detail: The level of detail for the summary (0-1), defaults to 0.
66
+ This parameter controls the trade-off between conciseness and completeness by adjusting how many
67
+ chunks the text is divided into. At detail=0, the text is processed as a single chunk (or very few
68
+ chunks), producing the most concise summary. At detail=1, the text is split into the maximum number
69
+ of chunks allowed by minimum_chunk_size, enabling more granular analysis and detailed summaries.
70
+ The formula uses linear interpolation: num_chunks = 1 + detail * (max_chunks - 1), where max_chunks
71
+ is determined by dividing the document length by minimum_chunk_size.
72
+ :param minimum_chunk_size: The minimum token count per chunk, defaults to 500
73
+ :param chunk_delimiter: The character used to determine separator priority.
74
+ "." uses sentence-based splitting, "\n" uses paragraph-based splitting, defaults to "."
75
+ :param summarize_recursively: Whether to use previous summaries as context, defaults to False.
76
+ :param split_overlap: Number of tokens to overlap between consecutive chunks, defaults to 0.
77
+ """
78
+ self._chat_generator = chat_generator
79
+ self.summary_detail = summary_detail
80
+ self.minimum_chunk_size = minimum_chunk_size
81
+ self.chunk_delimiter = chunk_delimiter
82
+ self.system_prompt = system_prompt
83
+ self.summarize_recursively = summarize_recursively
84
+ self.split_overlap = split_overlap
85
+
86
+ # Map chunk_delimiter to an appropriate separator strategy
87
+ separators = LLMSummarizer._get_separators_from_delimiter(chunk_delimiter)
88
+
89
+ # Initialize RecursiveDocumentSplitter
90
+ # Note: split_length will be updated dynamically based on detail parameter
91
+ self._document_splitter = RecursiveDocumentSplitter(
92
+ split_length=minimum_chunk_size if minimum_chunk_size else 500,
93
+ split_overlap=split_overlap,
94
+ split_unit="token",
95
+ separators=separators,
96
+ )
97
+
98
+ @staticmethod
99
+ def _get_separators_from_delimiter(delimiter: str) -> list[str]:
100
+ """
101
+ Map the delimiter to an appropriate list of separators for RecursiveDocumentSplitter.
102
+
103
+ :param delimiter: The delimiter character
104
+ :returns: List of separators in order of preference
105
+ """
106
+ if delimiter == ".":
107
+ # Sentence-focused splitting
108
+ return ["\n\n", "sentence", "\n", " "]
109
+ elif delimiter == "\n":
110
+ # Paragraph-focused splitting
111
+ return ["\n\n", "\n", "sentence", " "]
112
+ else:
113
+ # Custom delimiter - prioritize it
114
+ return ["\n\n", delimiter, "\n", " "]
115
+
116
+ def warm_up(self):
117
+ """
118
+ Warm up the chat generator and document splitter components.
119
+ """
120
+ # Warm up chat generator
121
+ if hasattr(self._chat_generator, "warm_up"):
122
+ self._chat_generator.warm_up()
123
+
124
+ # Warm up document splitter (needed for sentence splitting and tokenization)
125
+ if hasattr(self._document_splitter, "warm_up"):
126
+ self._document_splitter.warm_up()
127
+
128
+ def to_dict(self) -> dict[str, Any]:
129
+ """
130
+ Serializes the component to a dictionary.
131
+
132
+ :returns:
133
+ Dictionary with serialized data.
134
+ """
135
+ return default_to_dict(
136
+ self,
137
+ chat_generator=component_to_dict(obj=self._chat_generator, name="chat_generator"),
138
+ system_prompt=self.system_prompt,
139
+ summary_detail=self.summary_detail,
140
+ minimum_chunk_size=self.minimum_chunk_size,
141
+ chunk_delimiter=self.chunk_delimiter,
142
+ summarize_recursively=self.summarize_recursively,
143
+ split_overlap=self.split_overlap,
144
+ )
145
+
146
+ @classmethod
147
+ def from_dict(cls, data: dict[str, Any]) -> "LLMSummarizer":
148
+ """
149
+ Deserializes the component from a dictionary.
150
+
151
+ :param data: Dictionary with serialized data.
152
+ :returns:
153
+ An instance of the component.
154
+ """
155
+ init_params = data.get("init_parameters", {})
156
+
157
+ # Deserialize chat_generator
158
+ deserialize_chatgenerator_inplace(init_params, key="chat_generator")
159
+
160
+ return default_from_dict(cls, data)
161
+
162
+ def num_tokens(self, text: str) -> int:
163
+ """
164
+ Estimates the token count for a given text.
165
+
166
+ Uses the RecursiveDocumentSplitter's tokenization logic for consistency.
167
+
168
+ :param text: The text to tokenize
169
+ :returns:
170
+ The estimated token count
171
+ """
172
+ # Use the document splitter's tokenization method for consistency
173
+ return self._document_splitter._chunk_length(text)
174
+
175
+ def _prepare_text_chunks(self, text, detail, minimum_chunk_size, chunk_delimiter):
176
+ """
177
+ Prepares text chunks based on detail level using RecursiveDocumentSplitter.
178
+
179
+ The detail parameter (0-1) controls the granularity through linear interpolation:
180
+ - detail=0: Creates fewer, larger chunks (most concise summary)
181
+ - detail=1: Creates more, smaller chunks (most detailed summary)
182
+
183
+ The formula calculates: num_chunks = 1 + detail * (max_chunks - 1), where max_chunks is the
184
+ document_length divided by minimum_chunk_size. This interpolates between processing the entire
185
+ text as one chunk (detail=0) and splitting it into the maximum number of chunks that respect
186
+ the minimum_chunk_size constraint (detail=1). Higher detail values allow the LLM to analyze
187
+ smaller portions of text more carefully, preserving more information at the cost of longer
188
+ processing time and potentially longer summaries.
189
+
190
+ :param text: The text to chunk
191
+ :param detail: Detail level (0-1)
192
+ :param minimum_chunk_size: Minimum token count per chunk
193
+ :param chunk_delimiter: Delimiter for separator selection
194
+ :returns: List of text chunks
195
+ """
196
+ document_length = self.num_tokens(text)
197
+ max_chunks = max(1, document_length // minimum_chunk_size)
198
+ min_chunks = 1
199
+
200
+ num_chunks = int(min_chunks + detail * (max_chunks - min_chunks))
201
+ num_chunks = max(1, num_chunks) # Ensure at least 1 chunk
202
+
203
+ chunk_size = max(minimum_chunk_size, document_length // num_chunks)
204
+ self._document_splitter.split_length = chunk_size
205
+ if chunk_delimiter != self.chunk_delimiter:
206
+ self._document_splitter.separators = self._get_separators_from_delimiter(chunk_delimiter)
207
+
208
+ temp_doc = Document(content=text)
209
+ result = self._document_splitter.run(documents=[temp_doc])
210
+ text_chunks = [doc.content for doc in result["documents"]]
211
+
212
+ return text_chunks
213
+
214
+ def _process_chunks(self, text_chunks, summarize_recursively):
215
+ """
216
+ Processes each chunk individually, asking the LLM to summarize it, and accumulates all the summaries.
217
+
218
+ The parameter `summarize_recursively` allows to use previous summaries as context for the next chunk.
219
+ """
220
+ accumulated_summaries: list[str] = []
221
+
222
+ for chunk in tqdm(text_chunks):
223
+ if summarize_recursively and accumulated_summaries:
224
+ accumulated_summaries_string = "\n\n".join(accumulated_summaries)
225
+ user_message_content = (
226
+ f"Previous summaries:\n\n{accumulated_summaries_string}\n\nText to summarize next:\n\n{chunk}"
227
+ )
228
+ else:
229
+ user_message_content = chunk
230
+
231
+ # prepare the message and make the LLM call
232
+ # self.system_prompt is not None where due to the default value in the constructor
233
+ messages = [ChatMessage.from_system(self.system_prompt), ChatMessage.from_user(user_message_content)] # type: ignore
234
+ result = self._chat_generator.run(messages=messages)
235
+ accumulated_summaries.append(result["replies"][0].text)
236
+
237
+ return accumulated_summaries
238
+
239
+ def summarize(
240
+ self,
241
+ text: str,
242
+ detail: float,
243
+ minimum_chunk_size: int,
244
+ summarize_recursively: bool = False,
245
+ ) -> str:
246
+ """
247
+ Summarizes text by splitting it into optimally-sized chunks and processing each with an LLM.
248
+
249
+ :param text: Text to summarize
250
+ :param detail: Detail level (0-1) where 0 is most concise and 1 is most detailed
251
+ :param minimum_chunk_size: Minimum token count per chunk
252
+ :param summarize_recursively: Whether to use previous summaries as context
253
+
254
+ :returns:
255
+ The textual content summarized by the LLM.
256
+
257
+ :raises ValueError: If detail is not between 0 and 1
258
+ """
259
+
260
+ if not 0 <= detail <= 1:
261
+ raise ValueError("Detail must be between 0 and 1")
262
+
263
+ # calculate "optimal" chunking parameters
264
+ text_chunks = self._prepare_text_chunks(text, detail, minimum_chunk_size, self.chunk_delimiter)
265
+
266
+ # process chunks and accumulate summaries
267
+ accumulated_summaries = self._process_chunks(text_chunks, summarize_recursively)
268
+
269
+ # combine all summaries
270
+ return "\n\n".join(accumulated_summaries)
271
+
272
+ @component.output_types(summary=list[Document])
273
+ def run(
274
+ self,
275
+ *,
276
+ documents: list[Document],
277
+ detail: Optional[float] = None,
278
+ minimum_chunk_size: Optional[int] = None,
279
+ summarize_recursively: Optional[bool] = None,
280
+ system_prompt: Optional[str] = None,
281
+ ) -> dict[str, list[Document]]:
282
+ """
283
+ Run the summarizer on a list of documents.
284
+
285
+ :param documents: List of documents to summarize
286
+ :param detail: The level of detail for the summary (0-1), defaults to 0 overwriting the component's default.
287
+ :param minimum_chunk_size: The minimum token count per chunk, defaults to 500 overwriting the
288
+ component's default.
289
+ :param system_prompt: If given it will overwrite prompt given at init time or the default one.
290
+ :param summarize_recursively: Whether to use previous summaries as context, defaults to False overwriting the
291
+ component's default.
292
+
293
+ :raises RuntimeError: If the component wasn't warmed up.
294
+ """
295
+
296
+ if not self._document_splitter._is_warmed_up:
297
+ raise RuntimeError("The Summarizer component wasn't warmed up. Call 'warm_up()' before calling 'run()'.")
298
+
299
+ # let's allow to change some of the parameters at run time
300
+ detail = self.summary_detail if detail is None else detail
301
+ minimum_chunk_size = self.minimum_chunk_size if minimum_chunk_size is None else minimum_chunk_size
302
+ summarize_recursively = self.summarize_recursively if summarize_recursively is None else summarize_recursively
303
+ self.system_prompt = system_prompt if system_prompt else self.system_prompt
304
+
305
+ for doc in documents:
306
+ if doc.content is None or doc.content == "":
307
+ logger.warning("Document ID {doc_id} has an empty content. Skipping this document.", doc_id=doc.id)
308
+ continue
309
+ summary = self.summarize(
310
+ doc.content,
311
+ detail=detail,
312
+ minimum_chunk_size=minimum_chunk_size, # type: ignore # already checked, cannot be None here
313
+ summarize_recursively=summarize_recursively,
314
+ )
315
+ doc.meta["summary"] = summary
316
+
317
+ return {"documents": documents}
@@ -0,0 +1,3 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,3 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,119 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from copy import deepcopy
6
+ from dataclasses import replace
7
+ from datetime import datetime
8
+ from typing import TYPE_CHECKING, Any, Optional
9
+
10
+ from haystack import logging
11
+ from haystack.dataclasses.breakpoints import AgentBreakpoint, PipelineSnapshot, PipelineState, ToolBreakpoint
12
+ from haystack.utils.base_serialization import _serialize_value_with_schema
13
+ from haystack.utils.misc import _get_output_dir
14
+
15
+ from haystack_experimental.dataclasses.breakpoints import AgentSnapshot
16
+
17
+ if TYPE_CHECKING:
18
+ from haystack_experimental.components.agents.agent import _ExecutionContext
19
+ from haystack_experimental.components.agents.human_in_the_loop import ToolExecutionDecision
20
+
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def _create_agent_snapshot(
26
+ *,
27
+ component_visits: dict[str, int],
28
+ agent_breakpoint: AgentBreakpoint,
29
+ component_inputs: dict[str, Any],
30
+ tool_execution_decisions: Optional[list["ToolExecutionDecision"]] = None,
31
+ ) -> AgentSnapshot:
32
+ """
33
+ Create a snapshot of the agent's state.
34
+
35
+ NOTE: Only difference to Haystack's native implementation is the addition of tool_execution_decisions to the
36
+ AgentSnapshot.
37
+
38
+ :param component_visits: The visit counts for the agent's components.
39
+ :param agent_breakpoint: AgentBreakpoint object containing breakpoints
40
+ :param component_inputs: The inputs to the agent's components.
41
+ :param tool_execution_decisions: Optional list of ToolExecutionDecision objects representing decisions made
42
+ regarding tool executions.
43
+ :return: An AgentSnapshot containing the agent's state and component visits.
44
+ """
45
+ return AgentSnapshot(
46
+ component_inputs={
47
+ "chat_generator": _serialize_value_with_schema(deepcopy(component_inputs["chat_generator"])),
48
+ "tool_invoker": _serialize_value_with_schema(deepcopy(component_inputs["tool_invoker"])),
49
+ },
50
+ component_visits=component_visits,
51
+ break_point=agent_breakpoint,
52
+ timestamp=datetime.now(),
53
+ tool_execution_decisions=tool_execution_decisions,
54
+ )
55
+
56
+
57
+ def _create_pipeline_snapshot_from_tool_invoker(
58
+ *,
59
+ execution_context: "_ExecutionContext",
60
+ tool_name: Optional[str] = None,
61
+ agent_name: Optional[str] = None,
62
+ break_point: Optional[AgentBreakpoint] = None,
63
+ parent_snapshot: Optional[PipelineSnapshot] = None,
64
+ ) -> PipelineSnapshot:
65
+ """
66
+ Create a pipeline snapshot when a tool invoker breakpoint is raised or an exception during execution occurs.
67
+
68
+ :param execution_context: The current execution context of the agent.
69
+ :param tool_name: The name of the tool that triggered the breakpoint, if available.
70
+ :param agent_name: The name of the agent component if present in a pipeline.
71
+ :param break_point: An optional AgentBreakpoint object. If provided, it will be used instead of creating a new one.
72
+ A scenario where a new breakpoint is created is when an exception occurs during tool execution and we want to
73
+ capture the state at that point.
74
+ :param parent_snapshot: An optional parent PipelineSnapshot to build upon.
75
+ :returns:
76
+ A PipelineSnapshot containing the state of the pipeline and agent at the point of the breakpoint or exception.
77
+ """
78
+ if break_point is None:
79
+ agent_breakpoint = AgentBreakpoint(
80
+ agent_name=agent_name or "agent",
81
+ break_point=ToolBreakpoint(
82
+ component_name="tool_invoker",
83
+ visit_count=execution_context.component_visits["tool_invoker"],
84
+ tool_name=tool_name,
85
+ snapshot_file_path=_get_output_dir("pipeline_snapshot"),
86
+ ),
87
+ )
88
+ else:
89
+ agent_breakpoint = break_point
90
+
91
+ messages = execution_context.state.data["messages"]
92
+ agent_snapshot = _create_agent_snapshot(
93
+ component_visits=execution_context.component_visits,
94
+ agent_breakpoint=agent_breakpoint,
95
+ component_inputs={
96
+ "chat_generator": {"messages": messages[:-1], **execution_context.chat_generator_inputs},
97
+ "tool_invoker": {
98
+ "messages": messages[-1:], # tool invoker consumes last msg from the chat_generator, contains tool call
99
+ "state": execution_context.state,
100
+ **execution_context.tool_invoker_inputs,
101
+ },
102
+ },
103
+ tool_execution_decisions=execution_context.tool_execution_decisions,
104
+ )
105
+ if parent_snapshot is None:
106
+ # Create an empty pipeline snapshot if no parent snapshot is provided
107
+ final_snapshot = PipelineSnapshot(
108
+ pipeline_state=PipelineState(inputs={}, component_visits={}, pipeline_outputs={}),
109
+ timestamp=agent_snapshot.timestamp,
110
+ break_point=agent_snapshot.break_point,
111
+ agent_snapshot=agent_snapshot,
112
+ original_input_data={},
113
+ ordered_component_names=[],
114
+ include_outputs_from=set(),
115
+ )
116
+ else:
117
+ final_snapshot = replace(parent_snapshot, agent_snapshot=agent_snapshot)
118
+
119
+ return final_snapshot
@@ -0,0 +1,3 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,53 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from dataclasses import dataclass
6
+ from datetime import datetime
7
+ from typing import Any, Optional
8
+
9
+ from haystack.dataclasses.breakpoints import AgentBreakpoint
10
+ from haystack.dataclasses.breakpoints import AgentSnapshot as HaystackAgentSnapshot
11
+
12
+ from haystack_experimental.components.agents.human_in_the_loop.dataclasses import ToolExecutionDecision
13
+
14
+
15
+ @dataclass
16
+ class AgentSnapshot(HaystackAgentSnapshot):
17
+ tool_execution_decisions: Optional[list[ToolExecutionDecision]] = None
18
+
19
+ def to_dict(self) -> dict[str, Any]:
20
+ """
21
+ Convert the AgentSnapshot to a dictionary representation.
22
+
23
+ :return: A dictionary containing the agent state, timestamp, and breakpoint.
24
+ """
25
+ return {
26
+ "component_inputs": self.component_inputs,
27
+ "component_visits": self.component_visits,
28
+ "break_point": self.break_point.to_dict(),
29
+ "timestamp": self.timestamp.isoformat() if self.timestamp else None,
30
+ "tool_execution_decisions": [ted.to_dict() for ted in self.tool_execution_decisions]
31
+ if self.tool_execution_decisions
32
+ else None,
33
+ }
34
+
35
+ @classmethod
36
+ def from_dict(cls, data: dict) -> "AgentSnapshot":
37
+ """
38
+ Populate the AgentSnapshot from a dictionary representation.
39
+
40
+ :param data: A dictionary containing the agent state, timestamp, and breakpoint.
41
+ :return: An instance of AgentSnapshot.
42
+ """
43
+ return cls(
44
+ component_inputs=data["component_inputs"],
45
+ component_visits=data["component_visits"],
46
+ break_point=AgentBreakpoint.from_dict(data["break_point"]),
47
+ timestamp=datetime.fromisoformat(data["timestamp"]) if data.get("timestamp") else None,
48
+ tool_execution_decisions=[
49
+ ToolExecutionDecision.from_dict(ted) for ted in data.get("tool_execution_decisions", [])
50
+ ]
51
+ if data.get("tool_execution_decisions")
52
+ else None,
53
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haystack-experimental
3
- Version: 0.13.0
3
+ Version: 0.14.1
4
4
  Summary: Experimental components and features for the Haystack LLM framework.
5
5
  Project-URL: CI: GitHub, https://github.com/deepset-ai/haystack-experimental/actions
6
6
  Project-URL: GitHub: issues, https://github.com/deepset-ai/haystack-experimental/issues
@@ -25,6 +25,7 @@ Classifier: Programming Language :: Python :: 3.13
25
25
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
26
26
  Requires-Python: >=3.9
27
27
  Requires-Dist: haystack-ai
28
+ Requires-Dist: rich
28
29
  Description-Content-Type: text/markdown
29
30
 
30
31
  [![PyPI - Version](https://img.shields.io/pypi/v/haystack-experimental.svg)](https://pypi.org/project/haystack-experimental)
@@ -70,16 +71,19 @@ that includes it. Once it reaches the end of its lifespan, the experiment will b
70
71
 
71
72
  ### Active experiments
72
73
 
73
- | Name | Type | Expected End Date | Dependencies | Cookbook | Discussion |
74
- |---------------------------------------|--------------------------------|-------------------|--------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|
75
- | [`InMemoryChatMessageStore`][1] | Memory Store | December 2024 | None | <a href="https://colab.research.google.com/github/deepset-ai/haystack-cookbook/blob/main/notebooks/conversational_rag_using_memory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [Discuss][4] |
76
- | [`ChatMessageRetriever`][2] | Memory Component | December 2024 | None | <a href="https://colab.research.google.com/github/deepset-ai/haystack-cookbook/blob/main/notebooks/conversational_rag_using_memory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [Discuss][4] |
77
- | [`ChatMessageWriter`][3] | Memory Component | December 2024 | None | <a href="https://colab.research.google.com/github/deepset-ai/haystack-cookbook/blob/main/notebooks/conversational_rag_using_memory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [Discuss][4] |
78
- | [`QueryExpander`][5] | Query Expansion Component | October 2025 | None | None | [Discuss][6] |
79
- | [`EmbeddingBasedDocumentSplitter`][8] | EmbeddingBasedDocumentSplitter | August 2025 | None | None | [Discuss][7] |
80
- | [`MultiQueryEmbeddingRetriever`][9] | MultiQueryEmbeddingRetriever | November 2025 | None | None | [Discuss][11] |
81
- | [`MultiQueryTextRetriever`][10] | MultiQueryTextRetriever | November 2025 | None | None | [Discuss][12] |
82
- | [`OpenAIChatGenerator`][9] | Chat Generator Component | November 2025 | None | None | [Discuss][10] |
74
+ | Name | Type | Expected End Date | Dependencies | Cookbook | Discussion |
75
+ |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------|-------------------|--------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|
76
+ | [`InMemoryChatMessageStore`][1] | Memory Store | December 2024 | None | <a href="https://colab.research.google.com/github/deepset-ai/haystack-cookbook/blob/main/notebooks/conversational_rag_using_memory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [Discuss][4] |
77
+ | [`ChatMessageRetriever`][2] | Memory Component | December 2024 | None | <a href="https://colab.research.google.com/github/deepset-ai/haystack-cookbook/blob/main/notebooks/conversational_rag_using_memory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [Discuss][4] |
78
+ | [`ChatMessageWriter`][3] | Memory Component | December 2024 | None | <a href="https://colab.research.google.com/github/deepset-ai/haystack-cookbook/blob/main/notebooks/conversational_rag_using_memory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [Discuss][4] |
79
+ | [`QueryExpander`][5] | Query Expansion Component | October 2025 | None | None | [Discuss][6] |
80
+ | [`EmbeddingBasedDocumentSplitter`][8] | EmbeddingBasedDocumentSplitter | August 2025 | None | None | [Discuss][7] |
81
+ | [`MultiQueryEmbeddingRetriever`][13] | MultiQueryEmbeddingRetriever | November 2025 | None | None | [Discuss][11] |
82
+ | [`MultiQueryTextRetriever`][14] | MultiQueryTextRetriever | November 2025 | None | None | [Discuss][12] |
83
+ | [`OpenAIChatGenerator`][9] | Chat Generator Component | November 2025 | None | <a href="https://colab.research.google.com/github/deepset-ai/haystack-cookbook/blob/main/notebooks/hallucination_score_calculator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [Discuss][10] |
84
+ | [`MarkdownHeaderLevelInferrer`][15] | Preprocessor | January 2025 | None | None | [Discuss][16] |
85
+ | [`Agent`][17]; [Confirmation Policies][18]; [ConfirmationUIs][19]; [ConfirmationStrategies][20]; [`ConfirmationUIResult` and `ToolExecutionDecision`][21] [HITLBreakpointException][22] | Human in the Loop | December 2025 | rich | None | [Discuss][23] |
86
+ | [`LLMSummarizer`][24] | Document Summarizer | January 2025 | None | None | [Discuss][25] |
83
87
 
84
88
  [1]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/chat_message_stores/in_memory.py
85
89
  [2]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/components/retrievers/chat_message_retriever.py
@@ -90,11 +94,22 @@ that includes it. Once it reaches the end of its lifespan, the experiment will b
90
94
  [7]: https://github.com/deepset-ai/haystack-experimental/discussions/356
91
95
  [8]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/components/preprocessors/embedding_based_document_splitter.py
92
96
  [9]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/components/generators/chat/openai.py
93
- [10]: https://github.com/deepset-ai/haystack-experimental/discussions/XXX
94
- [11]: https://github.com/deepset-ai/haystack-experimental/discussions/<>
95
- [12]: https://github.com/deepset-ai/haystack-experimental/discussions/<>
97
+ [10]: https://github.com/deepset-ai/haystack-experimental/discussions/361
98
+ [11]: https://github.com/deepset-ai/haystack-experimental/discussions/363
99
+ [12]: https://github.com/deepset-ai/haystack-experimental/discussions/364
96
100
  [13]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/components/retrievers/multi_query_embedding_retriever.py
97
101
  [14]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/components/retrievers/multi_query_text_retriever.py
102
+ [15]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/components/preprocessors/md_header_level_inferrer.py
103
+ [16]: https://github.com/deepset-ai/haystack-experimental/discussions/376
104
+ [17]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/components/agents/agent.py
105
+ [18]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/components/agents/human_in_the_loop/policies.py
106
+ [19]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/components/agents/human_in_the_loop/user_interfaces.py
107
+ [20]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/components/agents/human_in_the_loop/strategies.py
108
+ [21]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/components/agents/human_in_the_loop/dataclasses.py
109
+ [22]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/components/agents/human_in_the_loop/errors.py
110
+ [23]: https://github.com/deepset-ai/haystack-experimental/discussions/381
111
+ [24]: https://github.com/deepset-ai/haystack-experimental/blob/main/haystack_experimental/components/sumarizers/llm_summarizer.py
112
+ [25]: https://github.com/deepset-ai/haystack-experimental/discussions/382
98
113
 
99
114
  ### Adopted experiments
100
115
  | Name | Type | Final release |
@@ -3,14 +3,25 @@ haystack_experimental/chat_message_stores/__init__.py,sha256=sQhjMoaVfC0XsL257eg
3
3
  haystack_experimental/chat_message_stores/in_memory.py,sha256=nc_B_70KOvtgsul4QJb7HihHZrO220HRFsm-aXjYFJk,2305
4
4
  haystack_experimental/chat_message_stores/types.py,sha256=QzjA5-A08PlMAy7MMLNNUpob8S60Ypec74gSbz_l49E,2101
5
5
  haystack_experimental/components/__init__.py,sha256=eHD7xrty2PCky_gG3ty19rpM4WfV32TyytM7gJODwl4,110
6
+ haystack_experimental/components/agents/__init__.py,sha256=Sxu9LxPpQ5cljgoTgUeNC0GY8CwUdiSy1JWkd_-RRJ4,414
7
+ haystack_experimental/components/agents/agent.py,sha256=jJ5baKFgT4PNuf2vu2dlm5AyJ9O7stlK94_1zk2u2OA,31814
8
+ haystack_experimental/components/agents/human_in_the_loop/__init__.py,sha256=xLr1G9pNWMmCpKN9mbv6yqeFfwMcbZyaVfCkzlwMxhY,1674
9
+ haystack_experimental/components/agents/human_in_the_loop/breakpoint.py,sha256=GhNdGdFNDnwSiTukD4WVp6-1YgGjq5oqCEcGMC2dcog,2902
10
+ haystack_experimental/components/agents/human_in_the_loop/dataclasses.py,sha256=OakB0PXBSG0LbQixcuo-d7IC-A3_k6qi80pB8hwY23o,2563
11
+ haystack_experimental/components/agents/human_in_the_loop/errors.py,sha256=HAjD_MCOTBirqnJdxpc2MhqIm-XnU3Soev29wRBWoMw,1066
12
+ haystack_experimental/components/agents/human_in_the_loop/policies.py,sha256=nzblePptT4Fg2GFHa4_SDIK_d7hZ_70qPhkteZBRXWk,3172
13
+ haystack_experimental/components/agents/human_in_the_loop/strategies.py,sha256=KXfMLky27EuxOUhqbHO7oQ3KkL_3lzkwvk1Gk1EMXYY,19643
14
+ haystack_experimental/components/agents/human_in_the_loop/types.py,sha256=aY93Wsd-5BgOiJaaSEGB_bGC-BTx_V_UT1faXtbNNdo,3072
15
+ haystack_experimental/components/agents/human_in_the_loop/user_interfaces.py,sha256=HlJ3-CYNrQGsHOtpvrQE4ayQls8Q3EkLFUkOoRnLVC4,8707
6
16
  haystack_experimental/components/embedders/__init__.py,sha256=eHD7xrty2PCky_gG3ty19rpM4WfV32TyytM7gJODwl4,110
7
17
  haystack_experimental/components/embedders/types/__init__.py,sha256=HGR8aavwIEx7v-8nm5JxFIw47EWn7vAUmywhakTNDCo,182
8
18
  haystack_experimental/components/embedders/types/protocol.py,sha256=EEVtggoYWZL6zF-vbasJollCxLbheMYIISAh7hJ8LkA,1038
9
19
  haystack_experimental/components/generators/__init__.py,sha256=eHD7xrty2PCky_gG3ty19rpM4WfV32TyytM7gJODwl4,110
10
20
  haystack_experimental/components/generators/chat/__init__.py,sha256=LEKI1mMtltVbSiU40QgBfnWC-z3_660TWuV-cVHhdTw,465
11
- haystack_experimental/components/generators/chat/openai.py,sha256=QFb-l_VbTkqi46rqE6rkmMoRkT3fX8kEsStOEGMEBRw,10040
21
+ haystack_experimental/components/generators/chat/openai.py,sha256=gX6UI4yfY0pzKhWErquvPF_gV-3Ut0y6wSJytAD07Jk,9855
12
22
  haystack_experimental/components/preprocessors/__init__.py,sha256=x3fM1lpGzYjWB3hpdbDWxXr_rYASb2e9yX0PgYG84rA,518
13
- haystack_experimental/components/preprocessors/embedding_based_document_splitter.py,sha256=moA7brOJHCsZ2FX7RHJp7Miy6gNQWWyMnsC6KE8MYOQ,16540
23
+ haystack_experimental/components/preprocessors/embedding_based_document_splitter.py,sha256=VyQ--gaMsWid-IRBVXi5YPJpwbFlaK-2mRFvRF8MSBQ,17616
24
+ haystack_experimental/components/preprocessors/md_header_level_inferrer.py,sha256=1Tn-H4Gvg2yYSUc54cPWKTCK78KXet5u32_1S8PM3NU,5643
14
25
  haystack_experimental/components/query/__init__.py,sha256=quaqe16cbtgIdJx7d56CMdk1zZQ6f_3_TICsU0HF_U8,446
15
26
  haystack_experimental/components/query/query_expander.py,sha256=zc9i2zP3ciOWWr029wO_lw3Tl8W3_kQcYcoHDs5Nj8c,12293
16
27
  haystack_experimental/components/retrievers/__init__.py,sha256=CqPvqyvGp5L3Y1gTVQC8DD_xHzbIfTzGlj3oCsZM3J8,528
@@ -19,8 +30,15 @@ haystack_experimental/components/retrievers/multi_query_embedding_retriever.py,s
19
30
  haystack_experimental/components/retrievers/multi_query_text_retriever.py,sha256=5awo60jy0attihYsNCG4LlJGROSjP2mFX2wAQiXYYOc,6557
20
31
  haystack_experimental/components/retrievers/types/__init__.py,sha256=iOngs3gs5enY8y6AWGeyQANTB_9qpXQ0QHSFFDDeEGc,218
21
32
  haystack_experimental/components/retrievers/types/protocol.py,sha256=oUdX_P_pTszzamrkUz3YZsXL3bb4mAYIXsPCtKDH1tw,2375
33
+ haystack_experimental/components/summarizers/__init__.py,sha256=BqnfB0ZMb9ufYUjJ4qmmmRLPXa9FT8XKhMWW8G9Zg9Y,221
34
+ haystack_experimental/components/summarizers/llm_summarizer.py,sha256=Rzl3DKWENBKoAiHvgYPsc4ev0WHZGJZj4PBF-FDHiXI,14392
22
35
  haystack_experimental/components/writers/__init__.py,sha256=iMdeAaZozza8E6dQ4Lc2uOYIFaR95K7bR9mSeuDqSAA,233
23
36
  haystack_experimental/components/writers/chat_message_writer.py,sha256=iu8gmvmRXlqd9S2-9B8p-7C0Y5GTuOI1AqcVKAkrzDc,3502
37
+ haystack_experimental/core/__init__.py,sha256=eHD7xrty2PCky_gG3ty19rpM4WfV32TyytM7gJODwl4,110
38
+ haystack_experimental/core/pipeline/__init__.py,sha256=eHD7xrty2PCky_gG3ty19rpM4WfV32TyytM7gJODwl4,110
39
+ haystack_experimental/core/pipeline/breakpoint.py,sha256=JtwQP8OF5Sdqo0abPRgs1K3SqBkUXhZ53PkeagcK2ZE,5134
40
+ haystack_experimental/dataclasses/__init__.py,sha256=eHD7xrty2PCky_gG3ty19rpM4WfV32TyytM7gJODwl4,110
41
+ haystack_experimental/dataclasses/breakpoints.py,sha256=f0kxYXJRHzk6jAW5Na51MZfUuRIlulhN4oTrGWTpSFE,2095
24
42
  haystack_experimental/super_components/__init__.py,sha256=eHD7xrty2PCky_gG3ty19rpM4WfV32TyytM7gJODwl4,110
25
43
  haystack_experimental/super_components/indexers/__init__.py,sha256=4VPKnuzVb89Zb4PT6ejYT4s0zJ4I3rwFtcLwsCdQKJA,313
26
44
  haystack_experimental/super_components/indexers/sentence_transformers_document_indexer.py,sha256=hfXznLVTgO39xO4GRYgi2Xy-pl4EFKtt13JrGncjvXQ,8519
@@ -30,8 +48,8 @@ haystack_experimental/utils/hallucination_risk_calculator/core_math.py,sha256=8X
30
48
  haystack_experimental/utils/hallucination_risk_calculator/dataclasses.py,sha256=3vk9jsbW-7C9n408Qe730qgdXxIOzsTigf4TMLpryvI,2318
31
49
  haystack_experimental/utils/hallucination_risk_calculator/openai_planner.py,sha256=-yVQsGzM5rXsAVwolE6sp5W6q1yDw66SiIUuUbPk1ng,11413
32
50
  haystack_experimental/utils/hallucination_risk_calculator/skeletonization.py,sha256=qNdBUoFiBjQsI3ovrhd4RyTFmIbv51Goai1Z_l9lG28,5488
33
- haystack_experimental-0.13.0.dist-info/METADATA,sha256=hb72ME6ftVs_CfpXH8iOathCK0B_lMAQkreiciekaJ0,13033
34
- haystack_experimental-0.13.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
35
- haystack_experimental-0.13.0.dist-info/licenses/LICENSE,sha256=93_5nS97uHxptHvK9E8BZgKxLGeIS-rBWT2swIv-X5Y,11368
36
- haystack_experimental-0.13.0.dist-info/licenses/LICENSE-MIT.txt,sha256=knmLkIKj_6tTrTSVRg9Tq88Kww4UCPLt2I1RGXJv9sQ,1037
37
- haystack_experimental-0.13.0.dist-info/RECORD,,
51
+ haystack_experimental-0.14.1.dist-info/METADATA,sha256=A1DesX7H05AtVp4S8PD99UIfyFy3cQB_y1a4Oho9Nmc,18566
52
+ haystack_experimental-0.14.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
53
+ haystack_experimental-0.14.1.dist-info/licenses/LICENSE,sha256=93_5nS97uHxptHvK9E8BZgKxLGeIS-rBWT2swIv-X5Y,11368
54
+ haystack_experimental-0.14.1.dist-info/licenses/LICENSE-MIT.txt,sha256=knmLkIKj_6tTrTSVRg9Tq88Kww4UCPLt2I1RGXJv9sQ,1037
55
+ haystack_experimental-0.14.1.dist-info/RECORD,,