flock-core 0.3.11__py3-none-any.whl → 0.3.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flock-core might be problematic. Click here for more details.

@@ -0,0 +1,195 @@
1
+ import re
2
+ from typing import Any
3
+
4
+ from flock.core.logging.trace_and_logged import traced_and_logged
5
+
6
+
7
+ @traced_and_logged
8
+ def split_markdown_by_headers(
9
+ markdown_text: str, min_header_level: int = 1, max_header_level: int = 2
10
+ ) -> list[dict[str, Any]]:
11
+ if not markdown_text:
12
+ return []
13
+
14
+ # Pattern to match headers from level min_header_level to max_header_level
15
+ header_pattern = re.compile(
16
+ f"^({'#' * min_header_level}){{'1,{max_header_level - min_header_level + 1}'}}\\s+(.+)$",
17
+ re.MULTILINE,
18
+ )
19
+
20
+ # Find all headers
21
+ headers = list(header_pattern.finditer(markdown_text))
22
+
23
+ if not headers:
24
+ return [{"title": "Text", "content": markdown_text, "level": 0}]
25
+
26
+ chunks = []
27
+
28
+ # Process each section
29
+ for i in range(len(headers)):
30
+ current_header = headers[i]
31
+ header_text = current_header.group(2).strip()
32
+ header_level = len(current_header.group(1))
33
+
34
+ # Determine section content
35
+ if i < len(headers) - 1:
36
+ next_header_start = headers[i + 1].start()
37
+ content = markdown_text[current_header.end() : next_header_start]
38
+ else:
39
+ content = markdown_text[current_header.end() :]
40
+
41
+ chunks.append(
42
+ {
43
+ "title": header_text,
44
+ "content": content.strip(),
45
+ "level": header_level,
46
+ }
47
+ )
48
+
49
+ # Check if there's content before the first header
50
+ if headers[0].start() > 0:
51
+ preamble = markdown_text[: headers[0].start()].strip()
52
+ if preamble:
53
+ chunks.insert(
54
+ 0, {"title": "Preamble", "content": preamble, "level": 0}
55
+ )
56
+
57
+ return chunks
58
+
59
+
60
+ @traced_and_logged
61
+ def extract_code_blocks(
62
+ markdown_text: str, language: str = None
63
+ ) -> list[dict[str, str]]:
64
+ if not markdown_text:
65
+ return []
66
+
67
+ # Pattern to match markdown code blocks
68
+ if language:
69
+ # Match only code blocks with the specified language
70
+ pattern = rf"```{language}\s*([\s\S]*?)\s*```"
71
+ else:
72
+ # Match all code blocks, capturing the language specifier if present
73
+ pattern = r"```(\w*)\s*([\s\S]*?)\s*```"
74
+
75
+ blocks = []
76
+
77
+ if language:
78
+ # If language is specified, we only capture the code content
79
+ matches = re.finditer(pattern, markdown_text)
80
+ for match in matches:
81
+ blocks.append(
82
+ {"language": language, "code": match.group(1).strip()}
83
+ )
84
+ else:
85
+ # If no language is specified, we capture both language and code content
86
+ matches = re.finditer(pattern, markdown_text)
87
+ for match in matches:
88
+ lang = match.group(1).strip() if match.group(1) else "text"
89
+ blocks.append({"language": lang, "code": match.group(2).strip()})
90
+
91
+ return blocks
92
+
93
+
94
+ @traced_and_logged
95
+ def extract_links(markdown_text: str) -> list[dict[str, str]]:
96
+ if not markdown_text:
97
+ return []
98
+
99
+ # Pattern to match markdown links [text](url)
100
+ link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
101
+ matches = link_pattern.findall(markdown_text)
102
+
103
+ return [{"text": text, "url": url} for text, url in matches]
104
+
105
+
106
+ @traced_and_logged
107
+ def extract_tables(markdown_text: str) -> list[dict[str, Any]]:
108
+ if not markdown_text:
109
+ return []
110
+
111
+ # Split the text by lines
112
+ lines = markdown_text.split("\n")
113
+
114
+ tables = []
115
+ current_table = None
116
+ header_row = None
117
+
118
+ for line in lines:
119
+ line = line.strip()
120
+
121
+ # Table rows are indicated by starting with |
122
+ if line.startswith("|") and line.endswith("|"):
123
+ if current_table is None:
124
+ current_table = []
125
+ # This is the header row
126
+ header_row = [
127
+ cell.strip() for cell in line.strip("|").split("|")
128
+ ]
129
+ elif "|--" in line or "|:-" in line:
130
+ # This is the separator row, ignore it
131
+ pass
132
+ else:
133
+ # This is a data row
134
+ row_data = [cell.strip() for cell in line.strip("|").split("|")]
135
+
136
+ # Create a dictionary mapping headers to values
137
+ row_dict = {}
138
+ for i, header in enumerate(header_row):
139
+ if i < len(row_data):
140
+ row_dict[header] = row_data[i]
141
+ else:
142
+ row_dict[header] = ""
143
+
144
+ current_table.append(row_dict)
145
+ else:
146
+ # End of table
147
+ if current_table is not None:
148
+ tables.append({"headers": header_row, "rows": current_table})
149
+ current_table = None
150
+ header_row = None
151
+
152
+ # Don't forget to add the last table if we're at the end of the document
153
+ if current_table is not None:
154
+ tables.append({"headers": header_row, "rows": current_table})
155
+
156
+ return tables
157
+
158
+
159
+ @traced_and_logged
160
+ def markdown_to_plain_text(markdown_text: str) -> str:
161
+ if not markdown_text:
162
+ return ""
163
+
164
+ # Replace headers
165
+ text = re.sub(r"^#{1,6}\s+(.+)$", r"\1", markdown_text, flags=re.MULTILINE)
166
+
167
+ # Replace bold and italic
168
+ text = re.sub(r"\*\*(.*?)\*\*", r"\1", text)
169
+ text = re.sub(r"__(.*?)__", r"\1", text)
170
+ text = re.sub(r"\*(.*?)\*", r"\1", text)
171
+ text = re.sub(r"_(.*?)_", r"\1", text)
172
+
173
+ # Replace links
174
+ text = re.sub(r"\[(.*?)\]\((.*?)\)", r"\1 (\2)", text)
175
+
176
+ # Replace code blocks
177
+ text = re.sub(r"```(?:\w+)?\s*([\s\S]*?)\s*```", r"\1", text)
178
+ text = re.sub(r"`([^`]*?)`", r"\1", text)
179
+
180
+ # Replace bullet points
181
+ text = re.sub(r"^[\*\-\+]\s+(.+)$", r"• \1", text, flags=re.MULTILINE)
182
+
183
+ # Replace numbered lists (keeping the numbers)
184
+ text = re.sub(r"^\d+\.\s+(.+)$", r"\1", text, flags=re.MULTILINE)
185
+
186
+ # Replace blockquotes
187
+ text = re.sub(r"^>\s+(.+)$", r"\1", text, flags=re.MULTILINE)
188
+
189
+ # Remove HTML tags
190
+ text = re.sub(r"<.*?>", "", text)
191
+
192
+ # Normalize whitespace
193
+ text = re.sub(r"\n{3,}", "\n\n", text)
194
+
195
+ return text.strip()
@@ -0,0 +1,88 @@
1
+ from typing import Any, Literal
2
+
3
+ from pydantic import Field
4
+
5
+ from flock.core.flock_agent import FlockAgent
6
+ from flock.core.flock_evaluator import FlockEvaluator, FlockEvaluatorConfig
7
+ from flock.core.mixin.dspy_integration import DSPyIntegrationMixin
8
+ from flock.core.mixin.prompt_parser import PromptParserMixin
9
+ from flock.modules.memory.memory_module import MemoryModule, MemoryModuleConfig
10
+
11
+
12
+ class MemoryEvaluatorConfig(FlockEvaluatorConfig):
13
+ folder_path: str = Field(
14
+ default="concept_memory/",
15
+ description="Directory where memory file and concept graph will be saved",
16
+ )
17
+ concept_graph_file: str = Field(
18
+ default="concept_graph.png",
19
+ description="Base filename for the concept graph image",
20
+ )
21
+
22
+ file_path: str | None = Field(
23
+ default="agent_memory.json", description="Path to save memory file"
24
+ )
25
+ memory_mapping: str | None = Field(
26
+ default=None, description="Memory mapping configuration"
27
+ )
28
+ similarity_threshold: float = Field(
29
+ default=0.5, description="Threshold for semantic similarity"
30
+ )
31
+ max_length: int = Field(
32
+ default=1000, description="Max length of memory entry before splitting"
33
+ )
34
+ save_after_update: bool = Field(
35
+ default=True, description="Whether to save memory after each update"
36
+ )
37
+ splitting_mode: Literal["summary", "semantic", "characters", "none"] = (
38
+ Field(default="none", description="Mode to split memory content")
39
+ )
40
+ enable_read_only_mode: bool = Field(
41
+ default=False, description="Whether to enable read only mode"
42
+ )
43
+ number_of_concepts_to_extract: int = Field(
44
+ default=3, description="Number of concepts to extract from the memory"
45
+ )
46
+
47
+
48
+ class MemoryEvaluator(FlockEvaluator, DSPyIntegrationMixin, PromptParserMixin):
49
+ """Evaluator that uses DSPy for generation."""
50
+
51
+ config: MemoryEvaluatorConfig = Field(
52
+ default_factory=MemoryEvaluatorConfig,
53
+ description="Evaluator configuration",
54
+ )
55
+
56
+ async def evaluate(
57
+ self, agent: FlockAgent, inputs: dict[str, Any], tools: list[Any]
58
+ ) -> dict[str, Any]:
59
+ """Simple evaluator that uses a memory concept graph.
60
+
61
+ if inputs contain "query", it searches memory for the query and returns the facts.
62
+ if inputs contain "data", it adds the data to memory
63
+ """
64
+ result = {}
65
+ memory_module = MemoryModule(
66
+ name=self.name,
67
+ config=MemoryModuleConfig(
68
+ folder_path=self.config.folder_path,
69
+ concept_graph_file=self.config.concept_graph_file,
70
+ file_path=self.config.file_path,
71
+ memory_mapping=self.config.memory_mapping,
72
+ similarity_threshold=self.config.similarity_threshold,
73
+ max_length=self.config.max_length,
74
+ save_after_update=self.config.save_after_update,
75
+ splitting_mode=self.config.splitting_mode,
76
+ enable_read_only_mode=self.config.enable_read_only_mode,
77
+ number_of_concepts_to_extract=self.config.number_of_concepts_to_extract,
78
+ ),
79
+ )
80
+
81
+ if "query" in inputs:
82
+ facts = await memory_module.search_memory(agent, inputs)
83
+ result = {"facts": facts}
84
+
85
+ if "data" in inputs:
86
+ await memory_module.add_to_memory(agent, inputs)
87
+ result = {"message": "Data added to memory"}
88
+ return result