flowllm 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowllm-0.1.0.dist-info/METADATA +597 -0
- flowllm-0.1.0.dist-info/RECORD +66 -0
- flowllm-0.1.0.dist-info/WHEEL +5 -0
- flowllm-0.1.0.dist-info/entry_points.txt +3 -0
- flowllm-0.1.0.dist-info/licenses/LICENSE +201 -0
- flowllm-0.1.0.dist-info/top_level.txt +1 -0
- llmflow/__init__.py +0 -0
- llmflow/app.py +53 -0
- llmflow/config/__init__.py +0 -0
- llmflow/config/config_parser.py +80 -0
- llmflow/config/mock_config.yaml +58 -0
- llmflow/embedding_model/__init__.py +5 -0
- llmflow/embedding_model/base_embedding_model.py +104 -0
- llmflow/embedding_model/openai_compatible_embedding_model.py +95 -0
- llmflow/enumeration/__init__.py +0 -0
- llmflow/enumeration/agent_state.py +8 -0
- llmflow/enumeration/chunk_enum.py +9 -0
- llmflow/enumeration/http_enum.py +9 -0
- llmflow/enumeration/role.py +8 -0
- llmflow/llm/__init__.py +5 -0
- llmflow/llm/base_llm.py +138 -0
- llmflow/llm/openai_compatible_llm.py +283 -0
- llmflow/mcp_server.py +110 -0
- llmflow/op/__init__.py +10 -0
- llmflow/op/base_op.py +125 -0
- llmflow/op/mock_op.py +40 -0
- llmflow/op/prompt_mixin.py +74 -0
- llmflow/op/react/__init__.py +0 -0
- llmflow/op/react/react_v1_op.py +88 -0
- llmflow/op/react/react_v1_prompt.yaml +28 -0
- llmflow/op/vector_store/__init__.py +13 -0
- llmflow/op/vector_store/recall_vector_store_op.py +48 -0
- llmflow/op/vector_store/update_vector_store_op.py +28 -0
- llmflow/op/vector_store/vector_store_action_op.py +46 -0
- llmflow/pipeline/__init__.py +0 -0
- llmflow/pipeline/pipeline.py +94 -0
- llmflow/pipeline/pipeline_context.py +37 -0
- llmflow/schema/__init__.py +0 -0
- llmflow/schema/app_config.py +69 -0
- llmflow/schema/experience.py +144 -0
- llmflow/schema/message.py +68 -0
- llmflow/schema/request.py +32 -0
- llmflow/schema/response.py +29 -0
- llmflow/schema/vector_node.py +11 -0
- llmflow/service/__init__.py +0 -0
- llmflow/service/llmflow_service.py +96 -0
- llmflow/tool/__init__.py +9 -0
- llmflow/tool/base_tool.py +80 -0
- llmflow/tool/code_tool.py +43 -0
- llmflow/tool/dashscope_search_tool.py +162 -0
- llmflow/tool/mcp_tool.py +77 -0
- llmflow/tool/tavily_search_tool.py +109 -0
- llmflow/tool/terminate_tool.py +23 -0
- llmflow/utils/__init__.py +0 -0
- llmflow/utils/common_utils.py +17 -0
- llmflow/utils/file_handler.py +25 -0
- llmflow/utils/http_client.py +156 -0
- llmflow/utils/op_utils.py +102 -0
- llmflow/utils/registry.py +33 -0
- llmflow/utils/singleton.py +9 -0
- llmflow/utils/timer.py +53 -0
- llmflow/vector_store/__init__.py +7 -0
- llmflow/vector_store/base_vector_store.py +136 -0
- llmflow/vector_store/chroma_vector_store.py +188 -0
- llmflow/vector_store/es_vector_store.py +227 -0
- llmflow/vector_store/file_vector_store.py +163 -0
@@ -0,0 +1,163 @@
|
|
1
|
+
import math
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import List, Iterable
|
4
|
+
|
5
|
+
from loguru import logger
|
6
|
+
from pydantic import Field, model_validator
|
7
|
+
|
8
|
+
from llmflow.embedding_model.openai_compatible_embedding_model import OpenAICompatibleEmbeddingModel
|
9
|
+
from llmflow.schema.vector_node import VectorNode
|
10
|
+
from llmflow.vector_store import VECTOR_STORE_REGISTRY
|
11
|
+
from llmflow.vector_store.base_vector_store import BaseVectorStore
|
12
|
+
|
13
|
+
|
14
|
+
@VECTOR_STORE_REGISTRY.register("local_file")
|
15
|
+
class FileVectorStore(BaseVectorStore):
|
16
|
+
store_dir: str = Field(default="./file_vector_store")
|
17
|
+
|
18
|
+
@model_validator(mode="after")
|
19
|
+
def init_client(self):
|
20
|
+
store_path = Path(self.store_dir)
|
21
|
+
store_path.mkdir(parents=True, exist_ok=True)
|
22
|
+
return self
|
23
|
+
|
24
|
+
@property
|
25
|
+
def store_path(self) -> Path:
|
26
|
+
return Path(self.store_dir)
|
27
|
+
|
28
|
+
def exist_workspace(self, workspace_id: str, **kwargs) -> bool:
|
29
|
+
workspace_path = self.store_path / f"{workspace_id}.jsonl"
|
30
|
+
return workspace_path.exists()
|
31
|
+
|
32
|
+
def delete_workspace(self, workspace_id: str, **kwargs):
|
33
|
+
workspace_path = self.store_path / f"{workspace_id}.jsonl"
|
34
|
+
if workspace_path.is_file():
|
35
|
+
workspace_path.unlink()
|
36
|
+
|
37
|
+
def create_workspace(self, workspace_id: str, **kwargs):
|
38
|
+
self._dump_to_path(nodes=[], workspace_id=workspace_id, path=self.store_path, **kwargs)
|
39
|
+
|
40
|
+
def _iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
|
41
|
+
for i, node in enumerate(self._load_from_path(path=self.store_path, workspace_id=workspace_id, **kwargs)):
|
42
|
+
yield node
|
43
|
+
|
44
|
+
@staticmethod
|
45
|
+
def calculate_similarity(query_vector: List[float], node_vector: List[float]):
|
46
|
+
assert query_vector, f"query_vector is empty!"
|
47
|
+
assert node_vector, f"node_vector is empty!"
|
48
|
+
assert len(query_vector) == len(node_vector), \
|
49
|
+
f"query_vector.size={len(query_vector)} node_vector.size={len(node_vector)}"
|
50
|
+
|
51
|
+
dot_product = sum(x * y for x, y in zip(query_vector, node_vector))
|
52
|
+
norm_v1 = math.sqrt(sum(x ** 2 for x in query_vector))
|
53
|
+
norm_v2 = math.sqrt(sum(y ** 2 for y in node_vector))
|
54
|
+
return dot_product / (norm_v1 * norm_v2)
|
55
|
+
|
56
|
+
def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
|
57
|
+
query_vector = self.embedding_model.get_embeddings(query)
|
58
|
+
nodes: List[VectorNode] = []
|
59
|
+
for node in self._load_from_path(path=self.store_path, workspace_id=workspace_id, **kwargs):
|
60
|
+
node.metadata["score"] = self.calculate_similarity(query_vector, node.vector)
|
61
|
+
nodes.append(node)
|
62
|
+
|
63
|
+
nodes = sorted(nodes, key=lambda x: x.metadata["score"], reverse=True)
|
64
|
+
return nodes[:top_k]
|
65
|
+
|
66
|
+
def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
|
67
|
+
if isinstance(nodes, VectorNode):
|
68
|
+
nodes = [nodes]
|
69
|
+
|
70
|
+
all_node_dict = {}
|
71
|
+
nodes: List[VectorNode] = self.embedding_model.get_node_embeddings(nodes)
|
72
|
+
exist_nodes: List[VectorNode] = list(self._load_from_path(path=self.store_path, workspace_id=workspace_id))
|
73
|
+
for node in exist_nodes:
|
74
|
+
all_node_dict[node.unique_id] = node
|
75
|
+
|
76
|
+
update_cnt = 0
|
77
|
+
for node in nodes:
|
78
|
+
if node.unique_id in all_node_dict:
|
79
|
+
update_cnt += 1
|
80
|
+
|
81
|
+
all_node_dict[node.unique_id] = node
|
82
|
+
|
83
|
+
self._dump_to_path(nodes=list(all_node_dict.values()),
|
84
|
+
workspace_id=workspace_id,
|
85
|
+
path=self.store_path,
|
86
|
+
**kwargs)
|
87
|
+
|
88
|
+
logger.info(f"update workspace_id={workspace_id} nodes.size={len(nodes)} all.size={len(all_node_dict)} "
|
89
|
+
f"update_cnt={update_cnt}")
|
90
|
+
|
91
|
+
def delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
|
92
|
+
if not self.exist_workspace(workspace_id=workspace_id):
|
93
|
+
logger.warning(f"workspace_id={workspace_id} is not exists!")
|
94
|
+
return
|
95
|
+
|
96
|
+
if isinstance(node_ids, str):
|
97
|
+
node_ids = [node_ids]
|
98
|
+
|
99
|
+
all_nodes: List[VectorNode] = list(self._load_from_path(path=self.store_path, workspace_id=workspace_id))
|
100
|
+
before_size = len(all_nodes)
|
101
|
+
all_nodes = [n for n in all_nodes if n.unique_id not in node_ids]
|
102
|
+
after_size = len(all_nodes)
|
103
|
+
|
104
|
+
self._dump_to_path(nodes=all_nodes, workspace_id=workspace_id, path=self.store_path, **kwargs)
|
105
|
+
logger.info(f"delete workspace_id={workspace_id} before_size={before_size} after_size={after_size}")
|
106
|
+
|
107
|
+
|
108
|
+
def main():
|
109
|
+
from dotenv import load_dotenv
|
110
|
+
load_dotenv()
|
111
|
+
|
112
|
+
embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
|
113
|
+
workspace_id = "rag_nodes_index"
|
114
|
+
client = FileVectorStore(embedding_model=embedding_model)
|
115
|
+
client.delete_workspace(workspace_id)
|
116
|
+
client.create_workspace(workspace_id)
|
117
|
+
|
118
|
+
sample_nodes = [
|
119
|
+
VectorNode(
|
120
|
+
workspace_id=workspace_id,
|
121
|
+
content="Artificial intelligence is a technology that simulates human intelligence.",
|
122
|
+
metadata={
|
123
|
+
"node_type": "n1",
|
124
|
+
}
|
125
|
+
),
|
126
|
+
VectorNode(
|
127
|
+
workspace_id=workspace_id,
|
128
|
+
content="AI is the future of mankind.",
|
129
|
+
metadata={
|
130
|
+
"node_type": "n1",
|
131
|
+
}
|
132
|
+
),
|
133
|
+
VectorNode(
|
134
|
+
workspace_id=workspace_id,
|
135
|
+
content="I want to eat fish!",
|
136
|
+
metadata={
|
137
|
+
"node_type": "n2",
|
138
|
+
}
|
139
|
+
),
|
140
|
+
VectorNode(
|
141
|
+
workspace_id=workspace_id,
|
142
|
+
content="The bigger the storm, the more expensive the fish.",
|
143
|
+
metadata={
|
144
|
+
"node_type": "n1",
|
145
|
+
}
|
146
|
+
),
|
147
|
+
]
|
148
|
+
|
149
|
+
client.insert(sample_nodes, workspace_id)
|
150
|
+
|
151
|
+
logger.info("=" * 20)
|
152
|
+
results = client.search("What is AI?", workspace_id=workspace_id, top_k=5)
|
153
|
+
for r in results:
|
154
|
+
logger.info(r.model_dump(exclude={"vector"}))
|
155
|
+
logger.info("=" * 20)
|
156
|
+
client.dump_workspace(workspace_id)
|
157
|
+
|
158
|
+
client.delete_workspace(workspace_id)
|
159
|
+
|
160
|
+
|
161
|
+
if __name__ == "__main__":
|
162
|
+
main()
|
163
|
+
# launch with: python -m llmflow.storage.file_vector_store
|