flowllm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. flowllm-0.1.0.dist-info/METADATA +597 -0
  2. flowllm-0.1.0.dist-info/RECORD +66 -0
  3. flowllm-0.1.0.dist-info/WHEEL +5 -0
  4. flowllm-0.1.0.dist-info/entry_points.txt +3 -0
  5. flowllm-0.1.0.dist-info/licenses/LICENSE +201 -0
  6. flowllm-0.1.0.dist-info/top_level.txt +1 -0
  7. llmflow/__init__.py +0 -0
  8. llmflow/app.py +53 -0
  9. llmflow/config/__init__.py +0 -0
  10. llmflow/config/config_parser.py +80 -0
  11. llmflow/config/mock_config.yaml +58 -0
  12. llmflow/embedding_model/__init__.py +5 -0
  13. llmflow/embedding_model/base_embedding_model.py +104 -0
  14. llmflow/embedding_model/openai_compatible_embedding_model.py +95 -0
  15. llmflow/enumeration/__init__.py +0 -0
  16. llmflow/enumeration/agent_state.py +8 -0
  17. llmflow/enumeration/chunk_enum.py +9 -0
  18. llmflow/enumeration/http_enum.py +9 -0
  19. llmflow/enumeration/role.py +8 -0
  20. llmflow/llm/__init__.py +5 -0
  21. llmflow/llm/base_llm.py +138 -0
  22. llmflow/llm/openai_compatible_llm.py +283 -0
  23. llmflow/mcp_server.py +110 -0
  24. llmflow/op/__init__.py +10 -0
  25. llmflow/op/base_op.py +125 -0
  26. llmflow/op/mock_op.py +40 -0
  27. llmflow/op/prompt_mixin.py +74 -0
  28. llmflow/op/react/__init__.py +0 -0
  29. llmflow/op/react/react_v1_op.py +88 -0
  30. llmflow/op/react/react_v1_prompt.yaml +28 -0
  31. llmflow/op/vector_store/__init__.py +13 -0
  32. llmflow/op/vector_store/recall_vector_store_op.py +48 -0
  33. llmflow/op/vector_store/update_vector_store_op.py +28 -0
  34. llmflow/op/vector_store/vector_store_action_op.py +46 -0
  35. llmflow/pipeline/__init__.py +0 -0
  36. llmflow/pipeline/pipeline.py +94 -0
  37. llmflow/pipeline/pipeline_context.py +37 -0
  38. llmflow/schema/__init__.py +0 -0
  39. llmflow/schema/app_config.py +69 -0
  40. llmflow/schema/experience.py +144 -0
  41. llmflow/schema/message.py +68 -0
  42. llmflow/schema/request.py +32 -0
  43. llmflow/schema/response.py +29 -0
  44. llmflow/schema/vector_node.py +11 -0
  45. llmflow/service/__init__.py +0 -0
  46. llmflow/service/llmflow_service.py +96 -0
  47. llmflow/tool/__init__.py +9 -0
  48. llmflow/tool/base_tool.py +80 -0
  49. llmflow/tool/code_tool.py +43 -0
  50. llmflow/tool/dashscope_search_tool.py +162 -0
  51. llmflow/tool/mcp_tool.py +77 -0
  52. llmflow/tool/tavily_search_tool.py +109 -0
  53. llmflow/tool/terminate_tool.py +23 -0
  54. llmflow/utils/__init__.py +0 -0
  55. llmflow/utils/common_utils.py +17 -0
  56. llmflow/utils/file_handler.py +25 -0
  57. llmflow/utils/http_client.py +156 -0
  58. llmflow/utils/op_utils.py +102 -0
  59. llmflow/utils/registry.py +33 -0
  60. llmflow/utils/singleton.py +9 -0
  61. llmflow/utils/timer.py +53 -0
  62. llmflow/vector_store/__init__.py +7 -0
  63. llmflow/vector_store/base_vector_store.py +136 -0
  64. llmflow/vector_store/chroma_vector_store.py +188 -0
  65. llmflow/vector_store/es_vector_store.py +227 -0
  66. llmflow/vector_store/file_vector_store.py +163 -0
@@ -0,0 +1,136 @@
1
+ import fcntl
2
+ import json
3
+ from abc import ABC
4
+ from pathlib import Path
5
+ from typing import List, Iterable
6
+
7
+ from loguru import logger
8
+ from pydantic import BaseModel, Field
9
+ from tqdm import tqdm
10
+
11
+ from llmflow.embedding_model.base_embedding_model import BaseEmbeddingModel
12
+ from llmflow.schema.vector_node import VectorNode
13
+
14
+
15
+ class BaseVectorStore(BaseModel, ABC):
16
+ embedding_model: BaseEmbeddingModel | None = Field(default=None)
17
+ batch_size: int = Field(default=1024)
18
+
19
+ @staticmethod
20
+ def _load_from_path(workspace_id: str, path: str | Path, callback_fn=None, **kwargs) -> Iterable[VectorNode]:
21
+ workspace_path = Path(path) / f"{workspace_id}.jsonl"
22
+ if not workspace_path.exists():
23
+ logger.warning(f"workspace_path={workspace_path} is not exists!")
24
+ return
25
+
26
+ with workspace_path.open() as f:
27
+ fcntl.flock(f, fcntl.LOCK_SH)
28
+ try:
29
+ for line in tqdm(f, desc="load from path"):
30
+ if line.strip():
31
+ node_dict = json.loads(line.strip())
32
+ if callback_fn:
33
+ node = callback_fn(node_dict)
34
+ else:
35
+ node = VectorNode(**node_dict, **kwargs)
36
+ node.workspace_id = workspace_id
37
+ yield node
38
+
39
+ finally:
40
+ fcntl.flock(f, fcntl.LOCK_UN)
41
+
42
+ @staticmethod
43
+ def _dump_to_path(nodes: Iterable[VectorNode], workspace_id: str, path: str | Path = "", callback_fn=None,
44
+ ensure_ascii: bool = False, **kwargs):
45
+ dump_path: Path = Path(path)
46
+ dump_path.mkdir(parents=True, exist_ok=True)
47
+ dump_file = dump_path / f"{workspace_id}.jsonl"
48
+
49
+ count = 0
50
+ with dump_file.open("w") as f:
51
+ fcntl.flock(f, fcntl.LOCK_EX)
52
+ try:
53
+ for node in tqdm(nodes, desc="dump to path"):
54
+ node.workspace_id = workspace_id
55
+ if callback_fn:
56
+ node_dict = callback_fn(node)
57
+ else:
58
+ node_dict = node.model_dump()
59
+ assert isinstance(node_dict, dict)
60
+ f.write(json.dumps(node_dict, ensure_ascii=ensure_ascii, **kwargs))
61
+ f.write("\n")
62
+ count += 1
63
+
64
+ return {"size": count}
65
+ finally:
66
+ fcntl.flock(f, fcntl.LOCK_UN)
67
+
68
+ def exist_workspace(self, workspace_id: str, **kwargs) -> bool:
69
+ raise NotImplementedError
70
+
71
+ def delete_workspace(self, workspace_id: str, **kwargs):
72
+ raise NotImplementedError
73
+
74
+ def create_workspace(self, workspace_id: str, **kwargs):
75
+ raise NotImplementedError
76
+
77
+ def _iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
78
+ raise NotImplementedError
79
+
80
+ def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
81
+ if not self.exist_workspace(workspace_id=workspace_id, **kwargs):
82
+ logger.warning(f"workspace_id={workspace_id} is not exist!")
83
+ return {}
84
+
85
+ return self._dump_to_path(nodes=self._iter_workspace_nodes(workspace_id=workspace_id, **kwargs),
86
+ workspace_id=workspace_id,
87
+ path=path,
88
+ callback_fn=callback_fn,
89
+ **kwargs)
90
+
91
+ def load_workspace(self, workspace_id: str, path: str | Path = "", nodes: List[VectorNode] = None, callback_fn=None,
92
+ **kwargs):
93
+ if self.exist_workspace(workspace_id, **kwargs):
94
+ self.delete_workspace(workspace_id=workspace_id, **kwargs)
95
+ logger.info(f"delete workspace_id={workspace_id}")
96
+
97
+ self.create_workspace(workspace_id=workspace_id, **kwargs)
98
+
99
+ all_nodes: List[VectorNode] = []
100
+ if nodes:
101
+ all_nodes.extend(nodes)
102
+ for node in self._load_from_path(path=path, workspace_id=workspace_id, callback_fn=callback_fn, **kwargs):
103
+ all_nodes.append(node)
104
+ self.insert(nodes=all_nodes, workspace_id=workspace_id, **kwargs)
105
+ return {"size": len(all_nodes)}
106
+
107
+ def copy_workspace(self, src_workspace_id: str, dest_workspace_id: str, **kwargs):
108
+ if not self.exist_workspace(workspace_id=src_workspace_id, **kwargs):
109
+ logger.warning(f"src_workspace_id={src_workspace_id} is not exist!")
110
+ return {}
111
+
112
+ if not self.exist_workspace(dest_workspace_id, **kwargs):
113
+ self.create_workspace(workspace_id=dest_workspace_id, **kwargs)
114
+
115
+ nodes = []
116
+ node_size = 0
117
+ for node in self._iter_workspace_nodes(workspace_id=src_workspace_id, **kwargs):
118
+ nodes.append(node)
119
+ node_size += 1
120
+ if len(nodes) >= self.batch_size:
121
+ self.insert(nodes=nodes, workspace_id=dest_workspace_id, **kwargs)
122
+ nodes.clear()
123
+
124
+ if nodes:
125
+ self.insert(nodes=nodes, workspace_id=dest_workspace_id, **kwargs)
126
+ return {"size": node_size}
127
+
128
+ def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
129
+ raise NotImplementedError
130
+
131
+ def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
132
+ raise NotImplementedError
133
+
134
+ def delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
135
+ raise NotImplementedError
136
+
@@ -0,0 +1,188 @@
1
+ from typing import List, Iterable
2
+
3
+ import chromadb
4
+ from chromadb import Collection
5
+ from chromadb.config import Settings
6
+ from loguru import logger
7
+ from pydantic import Field, PrivateAttr, model_validator
8
+
9
+ from llmflow.embedding_model.openai_compatible_embedding_model import OpenAICompatibleEmbeddingModel
10
+ from llmflow.schema.vector_node import VectorNode
11
+ from llmflow.vector_store import VECTOR_STORE_REGISTRY
12
+ from llmflow.vector_store.base_vector_store import BaseVectorStore
13
+
14
+
15
+ @VECTOR_STORE_REGISTRY.register("chroma")
16
+ class ChromaVectorStore(BaseVectorStore):
17
+ store_dir: str = Field(default="./chroma_vector_store")
18
+ collections: dict = Field(default_factory=dict)
19
+ _client: chromadb.Client = PrivateAttr()
20
+
21
+ @model_validator(mode="after")
22
+ def init_client(self):
23
+ self._client = chromadb.Client(Settings(persist_directory=self.store_dir))
24
+ return self
25
+
26
+ def _get_collection(self, workspace_id: str) -> Collection:
27
+ if workspace_id not in self.collections:
28
+ self.collections[workspace_id] = self._client.get_or_create_collection(workspace_id)
29
+ return self.collections[workspace_id]
30
+
31
+ def exist_workspace(self, workspace_id: str, **kwargs) -> bool:
32
+ return workspace_id in [c.name for c in self._client.list_collections()]
33
+
34
+ def delete_workspace(self, workspace_id: str, **kwargs):
35
+ self._client.delete_collection(workspace_id)
36
+ if workspace_id in self.collections:
37
+ del self.collections[workspace_id]
38
+
39
+ def create_workspace(self, workspace_id: str, **kwargs):
40
+ self.collections[workspace_id] = self._client.get_or_create_collection(workspace_id)
41
+
42
+ def _iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
43
+ collection: Collection = self._get_collection(workspace_id)
44
+ results = collection.get()
45
+ for i in range(len(results["ids"])):
46
+ node = VectorNode(workspace_id=workspace_id,
47
+ unique_id=results["ids"][i],
48
+ content=results["documents"][i],
49
+ metadata=results["metadatas"][i])
50
+ yield node
51
+
52
+ def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
53
+ if not self.exist_workspace(workspace_id=workspace_id):
54
+ logger.warning(f"workspace_id={workspace_id} is not exists!")
55
+ return []
56
+
57
+ collection: Collection = self._get_collection(workspace_id)
58
+ query_vector = self.embedding_model.get_embeddings(query)
59
+ results = collection.query(query_embeddings=[query_vector], n_results=top_k)
60
+ nodes = []
61
+ for i in range(len(results["ids"][0])):
62
+ node = VectorNode(workspace_id=workspace_id,
63
+ unique_id=results["ids"][0][i],
64
+ content=results["documents"][0][i],
65
+ metadata=results["metadatas"][0][i])
66
+ nodes.append(node)
67
+ return nodes
68
+
69
+ def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, **kwargs):
70
+ if not self.exist_workspace(workspace_id=workspace_id):
71
+ self.create_workspace(workspace_id=workspace_id)
72
+
73
+ if isinstance(nodes, VectorNode):
74
+ nodes = [nodes]
75
+
76
+ embedded_nodes = [node for node in nodes if node.vector]
77
+ not_embedded_nodes = [node for node in nodes if not node.vector]
78
+ now_embedded_nodes = self.embedding_model.get_node_embeddings(not_embedded_nodes)
79
+ all_nodes = embedded_nodes + now_embedded_nodes
80
+
81
+ collection: Collection = self._get_collection(workspace_id)
82
+ collection.add(ids=[n.unique_id for n in all_nodes],
83
+ embeddings=[n.vector for n in all_nodes],
84
+ documents=[n.content for n in all_nodes],
85
+ metadatas=[n.metadata for n in all_nodes])
86
+
87
+ def delete(self, node_ids: str | List[str], workspace_id: str, **kwargs):
88
+ if not self.exist_workspace(workspace_id=workspace_id):
89
+ logger.warning(f"workspace_id={workspace_id} is not exists!")
90
+ return
91
+
92
+ if isinstance(node_ids, str):
93
+ node_ids = [node_ids]
94
+
95
+ collection: Collection = self._get_collection(workspace_id)
96
+ collection.delete(ids=node_ids)
97
+
98
+
99
+ def main():
100
+ from dotenv import load_dotenv
101
+ load_dotenv()
102
+
103
+ embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
104
+ workspace_id = "chroma_test_index"
105
+
106
+ chroma_store = ChromaVectorStore(
107
+ embedding_model=embedding_model,
108
+ store_dir="./chroma_test_db"
109
+ )
110
+
111
+ if chroma_store.exist_workspace(workspace_id):
112
+ chroma_store.delete_workspace(workspace_id)
113
+ chroma_store.create_workspace(workspace_id)
114
+
115
+ sample_nodes = [
116
+ VectorNode(
117
+ unique_id="node1",
118
+ workspace_id=workspace_id,
119
+ content="Artificial intelligence is a technology that simulates human intelligence.",
120
+ metadata={
121
+ "node_type": "n1",
122
+ "category": "tech"
123
+ }
124
+ ),
125
+ VectorNode(
126
+ unique_id="node2",
127
+ workspace_id=workspace_id,
128
+ content="AI is the future of mankind.",
129
+ metadata={
130
+ "node_type": "n1",
131
+ "category": "tech"
132
+ }
133
+ ),
134
+ VectorNode(
135
+ unique_id="node3",
136
+ workspace_id=workspace_id,
137
+ content="I want to eat fish!",
138
+ metadata={
139
+ "node_type": "n2",
140
+ "category": "food"
141
+ }
142
+ ),
143
+ VectorNode(
144
+ unique_id="node4",
145
+ workspace_id=workspace_id,
146
+ content="The bigger the storm, the more expensive the fish.",
147
+ metadata={
148
+ "node_type": "n1",
149
+ "category": "food"
150
+ }
151
+ ),
152
+ ]
153
+
154
+ chroma_store.insert(sample_nodes, workspace_id=workspace_id)
155
+
156
+ logger.info("=" * 20)
157
+ results = chroma_store.search("What is AI?", top_k=5, workspace_id=workspace_id)
158
+ for r in results:
159
+ logger.info(r.model_dump(exclude={"vector"}))
160
+ logger.info("=" * 20)
161
+
162
+ node2_update = VectorNode(
163
+ unique_id="node2",
164
+ workspace_id=workspace_id,
165
+ content="AI is the future of humanity and technology.",
166
+ metadata={
167
+ "node_type": "n1",
168
+ "category": "tech",
169
+ "updated": True
170
+ }
171
+ )
172
+ chroma_store.delete(node2_update.unique_id, workspace_id=workspace_id)
173
+ chroma_store.insert(node2_update, workspace_id=workspace_id)
174
+
175
+ logger.info("Updated Result:")
176
+ results = chroma_store.search("fish?", top_k=10, workspace_id=workspace_id)
177
+ for r in results:
178
+ logger.info(r.model_dump(exclude={"vector"}))
179
+ logger.info("=" * 20)
180
+
181
+ chroma_store.dump_workspace(workspace_id=workspace_id)
182
+
183
+ chroma_store.delete_workspace(workspace_id=workspace_id)
184
+
185
+
186
+ if __name__ == "__main__":
187
+ main()
188
+ # launch with: python -m llmflow.storage.chroma_vector_store
@@ -0,0 +1,227 @@
1
+ import os
2
+ from typing import List, Tuple, Iterable
3
+
4
+ from elasticsearch import Elasticsearch
5
+ from elasticsearch.helpers import bulk
6
+ from loguru import logger
7
+ from pydantic import Field, PrivateAttr, model_validator
8
+
9
+ from llmflow.embedding_model.openai_compatible_embedding_model import OpenAICompatibleEmbeddingModel
10
+ from llmflow.schema.vector_node import VectorNode
11
+ from llmflow.vector_store import VECTOR_STORE_REGISTRY
12
+ from llmflow.vector_store.base_vector_store import BaseVectorStore
13
+
14
+
15
+ @VECTOR_STORE_REGISTRY.register("elasticsearch")
16
+ class EsVectorStore(BaseVectorStore):
17
+ hosts: str | List[str] = Field(default_factory=lambda: os.getenv("ES_HOSTS", "http://localhost:9200"))
18
+ basic_auth: str | Tuple[str, str] | None = Field(default=None)
19
+ retrieve_filters: List[dict] = []
20
+ _client: Elasticsearch = PrivateAttr()
21
+
22
+ @model_validator(mode="after")
23
+ def init_client(self):
24
+ if isinstance(self.hosts, str):
25
+ self.hosts = [self.hosts]
26
+ self._client = Elasticsearch(hosts=self.hosts, basic_auth=self.basic_auth)
27
+ return self
28
+
29
+ def exist_workspace(self, workspace_id: str, **kwargs) -> bool:
30
+ return self._client.indices.exists(index=workspace_id)
31
+
32
+ def delete_workspace(self, workspace_id: str, **kwargs):
33
+ return self._client.indices.delete(index=workspace_id, **kwargs)
34
+
35
+ def create_workspace(self, workspace_id: str, **kwargs):
36
+ body = {
37
+ "mappings": {
38
+ "properties": {
39
+ "workspace_id": {"type": "keyword"},
40
+ "content": {"type": "text"},
41
+ "metadata": {"type": "object"},
42
+ "vector": {
43
+ "type": "dense_vector",
44
+ "dims": self.embedding_model.dimensions
45
+ }
46
+ }
47
+ }
48
+ }
49
+ return self._client.indices.create(index=workspace_id, body=body)
50
+
51
+ def _iter_workspace_nodes(self, workspace_id: str, max_size: int = 10000, **kwargs) -> Iterable[VectorNode]:
52
+ response = self._client.search(index=workspace_id, body={"query": {"match_all": {}}, "size": max_size})
53
+ for doc in response['hits']['hits']:
54
+ yield self.doc2node(doc, workspace_id)
55
+
56
+ def refresh(self, workspace_id: str):
57
+ self._client.indices.refresh(index=workspace_id)
58
+
59
+ @staticmethod
60
+ def doc2node(doc, workspace_id: str) -> VectorNode:
61
+ node = VectorNode(**doc["_source"])
62
+ node.workspace_id = workspace_id
63
+ node.unique_id = doc["_id"]
64
+ if "_score" in doc:
65
+ node.metadata["_score"] = doc["_score"] - 1
66
+ return node
67
+
68
+ def add_term_filter(self, key: str, value):
69
+ if key:
70
+ self.retrieve_filters.append({"term": {key: value}})
71
+ return self
72
+
73
+ def add_range_filter(self, key: str, gte=None, lte=None):
74
+ if key:
75
+ if gte is not None and lte is not None:
76
+ self.retrieve_filters.append({"range": {key: {"gte": gte, "lte": lte}}})
77
+ elif gte is not None:
78
+ self.retrieve_filters.append({"range": {key: {"gte": gte}}})
79
+ elif lte is not None:
80
+ self.retrieve_filters.append({"range": {key: {"lte": lte}}})
81
+ return self
82
+
83
+ def clear_filter(self):
84
+ self.retrieve_filters.clear()
85
+ return self
86
+
87
+ def search(self, query: str, workspace_id: str, top_k: int = 1, **kwargs) -> List[VectorNode]:
88
+ if not self.exist_workspace(workspace_id=workspace_id):
89
+ logger.warning(f"workspace_id={workspace_id} is not exists!")
90
+ return []
91
+
92
+ query_vector = self.embedding_model.get_embeddings(query)
93
+ body = {
94
+ "query": {
95
+ "script_score": {
96
+ "query": {"bool": {"must": self.retrieve_filters}},
97
+ "script": {
98
+ "source": "cosineSimilarity(params.query_vector, 'vector') + 1.0",
99
+ "params": {"query_vector": query_vector},
100
+ }
101
+ }
102
+ },
103
+ "size": top_k
104
+ }
105
+ response = self._client.search(index=workspace_id, body=body, **kwargs)
106
+
107
+ nodes: List[VectorNode] = []
108
+ for doc in response['hits']['hits']:
109
+ nodes.append(self.doc2node(doc, workspace_id))
110
+
111
+ self.retrieve_filters.clear()
112
+ return nodes
113
+
114
+ def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, refresh: bool = False, **kwargs):
115
+ if not self.exist_workspace(workspace_id=workspace_id):
116
+ self.create_workspace(workspace_id=workspace_id)
117
+
118
+ if isinstance(nodes, VectorNode):
119
+ nodes = [nodes]
120
+
121
+ embedded_nodes = [node for node in nodes if node.vector]
122
+ not_embedded_nodes = [node for node in nodes if not node.vector]
123
+ now_embedded_nodes = self.embedding_model.get_node_embeddings(not_embedded_nodes)
124
+
125
+ docs = [
126
+ {
127
+ "_op_type": "index",
128
+ "_index": workspace_id,
129
+ "_id": node.unique_id,
130
+ "_source": {
131
+ "workspace_id": workspace_id,
132
+ "content": node.content,
133
+ "metadata": node.metadata,
134
+ "vector": node.vector
135
+ }
136
+ } for node in embedded_nodes + now_embedded_nodes]
137
+ status, error = bulk(self._client, docs, chunk_size=self.batch_size, **kwargs)
138
+ logger.info(f"insert docs.size={len(docs)} status={status} error={error}")
139
+
140
+ if refresh:
141
+ self.refresh(workspace_id=workspace_id)
142
+
143
+ def delete(self, node_ids: str | List[str], workspace_id: str, refresh: bool = False, **kwargs):
144
+ if not self.exist_workspace(workspace_id=workspace_id):
145
+ logger.warning(f"workspace_id={workspace_id} is not exists!")
146
+ return
147
+
148
+ if isinstance(node_ids, str):
149
+ node_ids = [node_ids]
150
+
151
+ actions = [
152
+ {
153
+ "_op_type": "delete",
154
+ "_index": workspace_id,
155
+ "_id": node_id
156
+ } for node_id in node_ids]
157
+ status, error = bulk(self._client, actions, chunk_size=self.batch_size, **kwargs)
158
+ logger.info(f"delete actions.size={len(actions)} status={status} error={error}")
159
+
160
+ if refresh:
161
+ self.refresh(workspace_id=workspace_id)
162
+
163
+
164
+ def main():
165
+ from dotenv import load_dotenv
166
+ load_dotenv()
167
+
168
+ embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
169
+ workspace_id = "rag_nodes_index"
170
+ hosts = "http://11.160.132.46:8200"
171
+ es = EsVectorStore(hosts=hosts, embedding_model=embedding_model)
172
+ if es.exist_workspace(workspace_id=workspace_id):
173
+ es.delete_workspace(workspace_id=workspace_id)
174
+ es.create_workspace(workspace_id=workspace_id)
175
+
176
+ sample_nodes = [
177
+ VectorNode(
178
+ workspace_id=workspace_id,
179
+ content="Artificial intelligence is a technology that simulates human intelligence.",
180
+ metadata={
181
+ "node_type": "n1",
182
+ }
183
+ ),
184
+ VectorNode(
185
+ workspace_id=workspace_id,
186
+ content="AI is the future of mankind.",
187
+ metadata={
188
+ "node_type": "n1",
189
+ }
190
+ ),
191
+ VectorNode(
192
+ workspace_id=workspace_id,
193
+ content="I want to eat fish!",
194
+ metadata={
195
+ "node_type": "n2",
196
+ }
197
+ ),
198
+ VectorNode(
199
+ workspace_id=workspace_id,
200
+ content="The bigger the storm, the more expensive the fish.",
201
+ metadata={
202
+ "node_type": "n1",
203
+ }
204
+ ),
205
+ ]
206
+
207
+ es.insert(sample_nodes, workspace_id=workspace_id, refresh=True)
208
+
209
+ logger.info("=" * 20)
210
+ results = es.add_term_filter(key="metadata.node_type", value="n1") \
211
+ .search("What is AI?", top_k=5, workspace_id=workspace_id)
212
+ for r in results:
213
+ logger.info(r.model_dump(exclude={"vector"}))
214
+ logger.info("=" * 20)
215
+
216
+ logger.info("=" * 20)
217
+ results = es.search("What is AI?", top_k=5, workspace_id=workspace_id)
218
+ for r in results:
219
+ logger.info(r.model_dump(exclude={"vector"}))
220
+ logger.info("=" * 20)
221
+ es.dump_workspace(workspace_id=workspace_id)
222
+ es.delete_workspace(workspace_id=workspace_id)
223
+
224
+
225
+ if __name__ == "__main__":
226
+ main()
227
+ # launch with: python -m llmflow.storage.es_vector_store