flock-core 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flock-core might be problematic. Click here for more details.
- flock/core/flock.py +15 -0
- flock/core/flock_agent.py +2 -1
- flock/core/flock_api.py +0 -1
- flock/core/logging/logging.py +172 -17
- flock/core/tools/basic_tools.py +8 -0
- flock/core/tools/llm_tools.py +788 -0
- flock/core/tools/markdown_tools.py +195 -0
- flock/evaluators/memory/memory_evaluator.py +88 -0
- flock/modules/memory/memory_module.py +257 -95
- flock/modules/memory/memory_parser.py +1 -1
- {flock_core-0.3.11.dist-info → flock_core-0.3.13.dist-info}/METADATA +2 -1
- {flock_core-0.3.11.dist-info → flock_core-0.3.13.dist-info}/RECORD +15 -12
- {flock_core-0.3.11.dist-info → flock_core-0.3.13.dist-info}/WHEEL +0 -0
- {flock_core-0.3.11.dist-info → flock_core-0.3.13.dist-info}/entry_points.txt +0 -0
- {flock_core-0.3.11.dist-info → flock_core-0.3.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from flock.core.logging.trace_and_logged import traced_and_logged
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@traced_and_logged
|
|
8
|
+
def split_markdown_by_headers(
|
|
9
|
+
markdown_text: str, min_header_level: int = 1, max_header_level: int = 2
|
|
10
|
+
) -> list[dict[str, Any]]:
|
|
11
|
+
if not markdown_text:
|
|
12
|
+
return []
|
|
13
|
+
|
|
14
|
+
# Pattern to match headers from level min_header_level to max_header_level
|
|
15
|
+
header_pattern = re.compile(
|
|
16
|
+
f"^({'#' * min_header_level}){{'1,{max_header_level - min_header_level + 1}'}}\\s+(.+)$",
|
|
17
|
+
re.MULTILINE,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Find all headers
|
|
21
|
+
headers = list(header_pattern.finditer(markdown_text))
|
|
22
|
+
|
|
23
|
+
if not headers:
|
|
24
|
+
return [{"title": "Text", "content": markdown_text, "level": 0}]
|
|
25
|
+
|
|
26
|
+
chunks = []
|
|
27
|
+
|
|
28
|
+
# Process each section
|
|
29
|
+
for i in range(len(headers)):
|
|
30
|
+
current_header = headers[i]
|
|
31
|
+
header_text = current_header.group(2).strip()
|
|
32
|
+
header_level = len(current_header.group(1))
|
|
33
|
+
|
|
34
|
+
# Determine section content
|
|
35
|
+
if i < len(headers) - 1:
|
|
36
|
+
next_header_start = headers[i + 1].start()
|
|
37
|
+
content = markdown_text[current_header.end() : next_header_start]
|
|
38
|
+
else:
|
|
39
|
+
content = markdown_text[current_header.end() :]
|
|
40
|
+
|
|
41
|
+
chunks.append(
|
|
42
|
+
{
|
|
43
|
+
"title": header_text,
|
|
44
|
+
"content": content.strip(),
|
|
45
|
+
"level": header_level,
|
|
46
|
+
}
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Check if there's content before the first header
|
|
50
|
+
if headers[0].start() > 0:
|
|
51
|
+
preamble = markdown_text[: headers[0].start()].strip()
|
|
52
|
+
if preamble:
|
|
53
|
+
chunks.insert(
|
|
54
|
+
0, {"title": "Preamble", "content": preamble, "level": 0}
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
return chunks
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@traced_and_logged
|
|
61
|
+
def extract_code_blocks(
|
|
62
|
+
markdown_text: str, language: str = None
|
|
63
|
+
) -> list[dict[str, str]]:
|
|
64
|
+
if not markdown_text:
|
|
65
|
+
return []
|
|
66
|
+
|
|
67
|
+
# Pattern to match markdown code blocks
|
|
68
|
+
if language:
|
|
69
|
+
# Match only code blocks with the specified language
|
|
70
|
+
pattern = rf"```{language}\s*([\s\S]*?)\s*```"
|
|
71
|
+
else:
|
|
72
|
+
# Match all code blocks, capturing the language specifier if present
|
|
73
|
+
pattern = r"```(\w*)\s*([\s\S]*?)\s*```"
|
|
74
|
+
|
|
75
|
+
blocks = []
|
|
76
|
+
|
|
77
|
+
if language:
|
|
78
|
+
# If language is specified, we only capture the code content
|
|
79
|
+
matches = re.finditer(pattern, markdown_text)
|
|
80
|
+
for match in matches:
|
|
81
|
+
blocks.append(
|
|
82
|
+
{"language": language, "code": match.group(1).strip()}
|
|
83
|
+
)
|
|
84
|
+
else:
|
|
85
|
+
# If no language is specified, we capture both language and code content
|
|
86
|
+
matches = re.finditer(pattern, markdown_text)
|
|
87
|
+
for match in matches:
|
|
88
|
+
lang = match.group(1).strip() if match.group(1) else "text"
|
|
89
|
+
blocks.append({"language": lang, "code": match.group(2).strip()})
|
|
90
|
+
|
|
91
|
+
return blocks
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@traced_and_logged
|
|
95
|
+
def extract_links(markdown_text: str) -> list[dict[str, str]]:
|
|
96
|
+
if not markdown_text:
|
|
97
|
+
return []
|
|
98
|
+
|
|
99
|
+
# Pattern to match markdown links [text](url)
|
|
100
|
+
link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
|
101
|
+
matches = link_pattern.findall(markdown_text)
|
|
102
|
+
|
|
103
|
+
return [{"text": text, "url": url} for text, url in matches]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@traced_and_logged
|
|
107
|
+
def extract_tables(markdown_text: str) -> list[dict[str, Any]]:
|
|
108
|
+
if not markdown_text:
|
|
109
|
+
return []
|
|
110
|
+
|
|
111
|
+
# Split the text by lines
|
|
112
|
+
lines = markdown_text.split("\n")
|
|
113
|
+
|
|
114
|
+
tables = []
|
|
115
|
+
current_table = None
|
|
116
|
+
header_row = None
|
|
117
|
+
|
|
118
|
+
for line in lines:
|
|
119
|
+
line = line.strip()
|
|
120
|
+
|
|
121
|
+
# Table rows are indicated by starting with |
|
|
122
|
+
if line.startswith("|") and line.endswith("|"):
|
|
123
|
+
if current_table is None:
|
|
124
|
+
current_table = []
|
|
125
|
+
# This is the header row
|
|
126
|
+
header_row = [
|
|
127
|
+
cell.strip() for cell in line.strip("|").split("|")
|
|
128
|
+
]
|
|
129
|
+
elif "|--" in line or "|:-" in line:
|
|
130
|
+
# This is the separator row, ignore it
|
|
131
|
+
pass
|
|
132
|
+
else:
|
|
133
|
+
# This is a data row
|
|
134
|
+
row_data = [cell.strip() for cell in line.strip("|").split("|")]
|
|
135
|
+
|
|
136
|
+
# Create a dictionary mapping headers to values
|
|
137
|
+
row_dict = {}
|
|
138
|
+
for i, header in enumerate(header_row):
|
|
139
|
+
if i < len(row_data):
|
|
140
|
+
row_dict[header] = row_data[i]
|
|
141
|
+
else:
|
|
142
|
+
row_dict[header] = ""
|
|
143
|
+
|
|
144
|
+
current_table.append(row_dict)
|
|
145
|
+
else:
|
|
146
|
+
# End of table
|
|
147
|
+
if current_table is not None:
|
|
148
|
+
tables.append({"headers": header_row, "rows": current_table})
|
|
149
|
+
current_table = None
|
|
150
|
+
header_row = None
|
|
151
|
+
|
|
152
|
+
# Don't forget to add the last table if we're at the end of the document
|
|
153
|
+
if current_table is not None:
|
|
154
|
+
tables.append({"headers": header_row, "rows": current_table})
|
|
155
|
+
|
|
156
|
+
return tables
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@traced_and_logged
|
|
160
|
+
def markdown_to_plain_text(markdown_text: str) -> str:
|
|
161
|
+
if not markdown_text:
|
|
162
|
+
return ""
|
|
163
|
+
|
|
164
|
+
# Replace headers
|
|
165
|
+
text = re.sub(r"^#{1,6}\s+(.+)$", r"\1", markdown_text, flags=re.MULTILINE)
|
|
166
|
+
|
|
167
|
+
# Replace bold and italic
|
|
168
|
+
text = re.sub(r"\*\*(.*?)\*\*", r"\1", text)
|
|
169
|
+
text = re.sub(r"__(.*?)__", r"\1", text)
|
|
170
|
+
text = re.sub(r"\*(.*?)\*", r"\1", text)
|
|
171
|
+
text = re.sub(r"_(.*?)_", r"\1", text)
|
|
172
|
+
|
|
173
|
+
# Replace links
|
|
174
|
+
text = re.sub(r"\[(.*?)\]\((.*?)\)", r"\1 (\2)", text)
|
|
175
|
+
|
|
176
|
+
# Replace code blocks
|
|
177
|
+
text = re.sub(r"```(?:\w+)?\s*([\s\S]*?)\s*```", r"\1", text)
|
|
178
|
+
text = re.sub(r"`([^`]*?)`", r"\1", text)
|
|
179
|
+
|
|
180
|
+
# Replace bullet points
|
|
181
|
+
text = re.sub(r"^[\*\-\+]\s+(.+)$", r"• \1", text, flags=re.MULTILINE)
|
|
182
|
+
|
|
183
|
+
# Replace numbered lists (keeping the numbers)
|
|
184
|
+
text = re.sub(r"^\d+\.\s+(.+)$", r"\1", text, flags=re.MULTILINE)
|
|
185
|
+
|
|
186
|
+
# Replace blockquotes
|
|
187
|
+
text = re.sub(r"^>\s+(.+)$", r"\1", text, flags=re.MULTILINE)
|
|
188
|
+
|
|
189
|
+
# Remove HTML tags
|
|
190
|
+
text = re.sub(r"<.*?>", "", text)
|
|
191
|
+
|
|
192
|
+
# Normalize whitespace
|
|
193
|
+
text = re.sub(r"\n{3,}", "\n\n", text)
|
|
194
|
+
|
|
195
|
+
return text.strip()
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from typing import Any, Literal
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
|
|
5
|
+
from flock.core.flock_agent import FlockAgent
|
|
6
|
+
from flock.core.flock_evaluator import FlockEvaluator, FlockEvaluatorConfig
|
|
7
|
+
from flock.core.mixin.dspy_integration import DSPyIntegrationMixin
|
|
8
|
+
from flock.core.mixin.prompt_parser import PromptParserMixin
|
|
9
|
+
from flock.modules.memory.memory_module import MemoryModule, MemoryModuleConfig
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MemoryEvaluatorConfig(FlockEvaluatorConfig):
|
|
13
|
+
folder_path: str = Field(
|
|
14
|
+
default="concept_memory/",
|
|
15
|
+
description="Directory where memory file and concept graph will be saved",
|
|
16
|
+
)
|
|
17
|
+
concept_graph_file: str = Field(
|
|
18
|
+
default="concept_graph.png",
|
|
19
|
+
description="Base filename for the concept graph image",
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
file_path: str | None = Field(
|
|
23
|
+
default="agent_memory.json", description="Path to save memory file"
|
|
24
|
+
)
|
|
25
|
+
memory_mapping: str | None = Field(
|
|
26
|
+
default=None, description="Memory mapping configuration"
|
|
27
|
+
)
|
|
28
|
+
similarity_threshold: float = Field(
|
|
29
|
+
default=0.5, description="Threshold for semantic similarity"
|
|
30
|
+
)
|
|
31
|
+
max_length: int = Field(
|
|
32
|
+
default=1000, description="Max length of memory entry before splitting"
|
|
33
|
+
)
|
|
34
|
+
save_after_update: bool = Field(
|
|
35
|
+
default=True, description="Whether to save memory after each update"
|
|
36
|
+
)
|
|
37
|
+
splitting_mode: Literal["summary", "semantic", "characters", "none"] = (
|
|
38
|
+
Field(default="none", description="Mode to split memory content")
|
|
39
|
+
)
|
|
40
|
+
enable_read_only_mode: bool = Field(
|
|
41
|
+
default=False, description="Whether to enable read only mode"
|
|
42
|
+
)
|
|
43
|
+
number_of_concepts_to_extract: int = Field(
|
|
44
|
+
default=3, description="Number of concepts to extract from the memory"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class MemoryEvaluator(FlockEvaluator, DSPyIntegrationMixin, PromptParserMixin):
|
|
49
|
+
"""Evaluator that uses DSPy for generation."""
|
|
50
|
+
|
|
51
|
+
config: MemoryEvaluatorConfig = Field(
|
|
52
|
+
default_factory=MemoryEvaluatorConfig,
|
|
53
|
+
description="Evaluator configuration",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
async def evaluate(
|
|
57
|
+
self, agent: FlockAgent, inputs: dict[str, Any], tools: list[Any]
|
|
58
|
+
) -> dict[str, Any]:
|
|
59
|
+
"""Simple evaluator that uses a memory concept graph.
|
|
60
|
+
|
|
61
|
+
if inputs contain "query", it searches memory for the query and returns the facts.
|
|
62
|
+
if inputs contain "data", it adds the data to memory
|
|
63
|
+
"""
|
|
64
|
+
result = {}
|
|
65
|
+
memory_module = MemoryModule(
|
|
66
|
+
name=self.name,
|
|
67
|
+
config=MemoryModuleConfig(
|
|
68
|
+
folder_path=self.config.folder_path,
|
|
69
|
+
concept_graph_file=self.config.concept_graph_file,
|
|
70
|
+
file_path=self.config.file_path,
|
|
71
|
+
memory_mapping=self.config.memory_mapping,
|
|
72
|
+
similarity_threshold=self.config.similarity_threshold,
|
|
73
|
+
max_length=self.config.max_length,
|
|
74
|
+
save_after_update=self.config.save_after_update,
|
|
75
|
+
splitting_mode=self.config.splitting_mode,
|
|
76
|
+
enable_read_only_mode=self.config.enable_read_only_mode,
|
|
77
|
+
number_of_concepts_to_extract=self.config.number_of_concepts_to_extract,
|
|
78
|
+
),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if "query" in inputs:
|
|
82
|
+
facts = await memory_module.search_memory(agent, inputs)
|
|
83
|
+
result = {"facts": facts}
|
|
84
|
+
|
|
85
|
+
if "data" in inputs:
|
|
86
|
+
await memory_module.add_to_memory(agent, inputs)
|
|
87
|
+
result = {"message": "Data added to memory"}
|
|
88
|
+
return result
|