isage-middleware 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of isage-middleware might be problematic. Click here for more details.

Files changed (191) hide show
  1. isage_middleware-0.1.0.dist-info/METADATA +424 -0
  2. isage_middleware-0.1.0.dist-info/RECORD +191 -0
  3. isage_middleware-0.1.0.dist-info/WHEEL +5 -0
  4. isage_middleware-0.1.0.dist-info/top_level.txt +1 -0
  5. sage/__init__.py +2 -0
  6. sage/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  7. sage/__pycache__/__init__.cpython-311.pyc +0 -0
  8. sage/middleware/__init__.py +83 -0
  9. sage/middleware/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  10. sage/middleware/__pycache__/__init__.cpython-311.pyc +0 -0
  11. sage/middleware/api/__init__.py +22 -0
  12. sage/middleware/api/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  13. sage/middleware/api/__pycache__/__init__.cpython-311.pyc +0 -0
  14. sage/middleware/api/__pycache__/graph_api.cpython-311.opt-2.pyc +0 -0
  15. sage/middleware/api/__pycache__/graph_api.cpython-311.pyc +0 -0
  16. sage/middleware/api/__pycache__/kv_api.cpython-311.opt-2.pyc +0 -0
  17. sage/middleware/api/__pycache__/kv_api.cpython-311.pyc +0 -0
  18. sage/middleware/api/__pycache__/memory_api.cpython-311.opt-2.pyc +0 -0
  19. sage/middleware/api/__pycache__/memory_api.cpython-311.pyc +0 -0
  20. sage/middleware/api/__pycache__/vdb_api.cpython-311.opt-2.pyc +0 -0
  21. sage/middleware/api/__pycache__/vdb_api.cpython-311.pyc +0 -0
  22. sage/middleware/api/graph_api.py +74 -0
  23. sage/middleware/api/kv_api.py +45 -0
  24. sage/middleware/api/memory_api.py +64 -0
  25. sage/middleware/api/vdb_api.py +60 -0
  26. sage/middleware/enterprise/__init__.py +75 -0
  27. sage/middleware/enterprise/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  28. sage/middleware/enterprise/__pycache__/__init__.cpython-311.pyc +0 -0
  29. sage/middleware/enterprise/sage_db/__init__.py +132 -0
  30. sage/middleware/enterprise/sage_db/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  31. sage/middleware/enterprise/sage_db/__pycache__/__init__.cpython-311.pyc +0 -0
  32. sage/middleware/enterprise/sage_db/__pycache__/sage_db.cpython-311.opt-2.pyc +0 -0
  33. sage/middleware/enterprise/sage_db/__pycache__/sage_db.cpython-311.pyc +0 -0
  34. sage/middleware/enterprise/sage_db/python/__init__.py +7 -0
  35. sage/middleware/enterprise/sage_db/python/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  36. sage/middleware/enterprise/sage_db/python/__pycache__/__init__.cpython-311.pyc +0 -0
  37. sage/middleware/enterprise/sage_db/python/__pycache__/sage_db.cpython-311.opt-2.pyc +0 -0
  38. sage/middleware/enterprise/sage_db/python/__pycache__/sage_db.cpython-311.pyc +0 -0
  39. sage/middleware/enterprise/sage_db/python/sage_db.py +44 -0
  40. sage/middleware/enterprise/sage_db/sage_db.py +395 -0
  41. sage/middleware/enterprise/sage_db/tests/__pycache__/test_python.cpython-311.opt-2.pyc +0 -0
  42. sage/middleware/enterprise/sage_db/tests/__pycache__/test_python.cpython-311.pyc +0 -0
  43. sage/middleware/enterprise/sage_db/tests/test_python.py +144 -0
  44. sage/middleware/examples/__pycache__/api_usage_tutorial.cpython-311.opt-2.pyc +0 -0
  45. sage/middleware/examples/__pycache__/api_usage_tutorial.cpython-311.pyc +0 -0
  46. sage/middleware/examples/__pycache__/dag_microservices_demo.cpython-311.opt-2.pyc +0 -0
  47. sage/middleware/examples/__pycache__/dag_microservices_demo.cpython-311.pyc +0 -0
  48. sage/middleware/examples/__pycache__/microservices_demo.cpython-311.opt-2.pyc +0 -0
  49. sage/middleware/examples/__pycache__/microservices_demo.cpython-311.pyc +0 -0
  50. sage/middleware/examples/__pycache__/microservices_integration_demo.cpython-311.opt-2.pyc +0 -0
  51. sage/middleware/examples/__pycache__/microservices_integration_demo.cpython-311.pyc +0 -0
  52. sage/middleware/examples/__pycache__/microservices_registration_demo.cpython-311.opt-2.pyc +0 -0
  53. sage/middleware/examples/__pycache__/microservices_registration_demo.cpython-311.pyc +0 -0
  54. sage/middleware/examples/api_usage_tutorial.py +339 -0
  55. sage/middleware/examples/dag_microservices_demo.py +220 -0
  56. sage/middleware/examples/microservices_demo.py +0 -0
  57. sage/middleware/examples/microservices_integration_demo.py +373 -0
  58. sage/middleware/examples/microservices_registration_demo.py +144 -0
  59. sage/middleware/py.typed +2 -0
  60. sage/middleware/services/graph/__init__.py +8 -0
  61. sage/middleware/services/graph/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  62. sage/middleware/services/graph/__pycache__/__init__.cpython-311.pyc +0 -0
  63. sage/middleware/services/graph/__pycache__/graph_index.cpython-311.opt-2.pyc +0 -0
  64. sage/middleware/services/graph/__pycache__/graph_index.cpython-311.pyc +0 -0
  65. sage/middleware/services/graph/__pycache__/graph_service.cpython-311.opt-2.pyc +0 -0
  66. sage/middleware/services/graph/__pycache__/graph_service.cpython-311.pyc +0 -0
  67. sage/middleware/services/graph/examples/__pycache__/graph_demo.cpython-311.opt-2.pyc +0 -0
  68. sage/middleware/services/graph/examples/__pycache__/graph_demo.cpython-311.pyc +0 -0
  69. sage/middleware/services/graph/examples/graph_demo.py +177 -0
  70. sage/middleware/services/graph/graph_index.py +194 -0
  71. sage/middleware/services/graph/graph_service.py +541 -0
  72. sage/middleware/services/graph/search_engine/__init__.py +0 -0
  73. sage/middleware/services/graph/search_engine/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  74. sage/middleware/services/graph/search_engine/__pycache__/__init__.cpython-311.pyc +0 -0
  75. sage/middleware/services/graph/search_engine/__pycache__/base_graph_index.cpython-311.opt-2.pyc +0 -0
  76. sage/middleware/services/graph/search_engine/__pycache__/base_graph_index.cpython-311.pyc +0 -0
  77. sage/middleware/services/graph/search_engine/base_graph_index.py +0 -0
  78. sage/middleware/services/kv/__init__.py +8 -0
  79. sage/middleware/services/kv/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  80. sage/middleware/services/kv/__pycache__/__init__.cpython-311.pyc +0 -0
  81. sage/middleware/services/kv/__pycache__/kv_service.cpython-311.opt-2.pyc +0 -0
  82. sage/middleware/services/kv/__pycache__/kv_service.cpython-311.pyc +0 -0
  83. sage/middleware/services/kv/examples/__pycache__/kv_demo.cpython-311.opt-2.pyc +0 -0
  84. sage/middleware/services/kv/examples/__pycache__/kv_demo.cpython-311.pyc +0 -0
  85. sage/middleware/services/kv/examples/kv_demo.py +213 -0
  86. sage/middleware/services/kv/kv_service.py +306 -0
  87. sage/middleware/services/kv/search_engine/__init__.py +0 -0
  88. sage/middleware/services/kv/search_engine/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  89. sage/middleware/services/kv/search_engine/__pycache__/__init__.cpython-311.pyc +0 -0
  90. sage/middleware/services/kv/search_engine/__pycache__/base_kv_index.cpython-311.opt-2.pyc +0 -0
  91. sage/middleware/services/kv/search_engine/__pycache__/base_kv_index.cpython-311.pyc +0 -0
  92. sage/middleware/services/kv/search_engine/__pycache__/bm25s_index.cpython-311.opt-2.pyc +0 -0
  93. sage/middleware/services/kv/search_engine/__pycache__/bm25s_index.cpython-311.pyc +0 -0
  94. sage/middleware/services/kv/search_engine/base_kv_index.py +75 -0
  95. sage/middleware/services/kv/search_engine/bm25s_index.py +238 -0
  96. sage/middleware/services/memory/__init__.py +12 -0
  97. sage/middleware/services/memory/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  98. sage/middleware/services/memory/__pycache__/__init__.cpython-311.pyc +0 -0
  99. sage/middleware/services/memory/__pycache__/memory_service.cpython-311.opt-2.pyc +0 -0
  100. sage/middleware/services/memory/__pycache__/memory_service.cpython-311.pyc +0 -0
  101. sage/middleware/services/memory/examples/__pycache__/dag_microservices_demo.cpython-311.opt-2.pyc +0 -0
  102. sage/middleware/services/memory/examples/__pycache__/dag_microservices_demo.cpython-311.pyc +0 -0
  103. sage/middleware/services/memory/examples/__pycache__/memory_demo.cpython-311.opt-2.pyc +0 -0
  104. sage/middleware/services/memory/examples/__pycache__/memory_demo.cpython-311.pyc +0 -0
  105. sage/middleware/services/memory/examples/dag_microservices_demo.py +220 -0
  106. sage/middleware/services/memory/examples/memory_demo.py +490 -0
  107. sage/middleware/services/memory/memory_collection/__pycache__/base_collection.cpython-311.opt-2.pyc +0 -0
  108. sage/middleware/services/memory/memory_collection/__pycache__/base_collection.cpython-311.pyc +0 -0
  109. sage/middleware/services/memory/memory_collection/__pycache__/graph_collection.cpython-311.opt-2.pyc +0 -0
  110. sage/middleware/services/memory/memory_collection/__pycache__/graph_collection.cpython-311.pyc +0 -0
  111. sage/middleware/services/memory/memory_collection/__pycache__/kv_collection.cpython-311.opt-2.pyc +0 -0
  112. sage/middleware/services/memory/memory_collection/__pycache__/kv_collection.cpython-311.pyc +0 -0
  113. sage/middleware/services/memory/memory_collection/__pycache__/vdb_collection.cpython-311.opt-2.pyc +0 -0
  114. sage/middleware/services/memory/memory_collection/__pycache__/vdb_collection.cpython-311.pyc +0 -0
  115. sage/middleware/services/memory/memory_collection/base_collection.py +0 -0
  116. sage/middleware/services/memory/memory_collection/graph_collection.py +0 -0
  117. sage/middleware/services/memory/memory_collection/kv_collection.py +0 -0
  118. sage/middleware/services/memory/memory_collection/vdb_collection.py +0 -0
  119. sage/middleware/services/memory/memory_service.py +474 -0
  120. sage/middleware/services/memory/utils/__init__.py +0 -0
  121. sage/middleware/services/memory/utils/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  122. sage/middleware/services/memory/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  123. sage/middleware/services/memory/utils/__pycache__/path_utils.cpython-311.opt-2.pyc +0 -0
  124. sage/middleware/services/memory/utils/__pycache__/path_utils.cpython-311.pyc +0 -0
  125. sage/middleware/services/memory/utils/path_utils.py +0 -0
  126. sage/middleware/services/vdb/__init__.py +8 -0
  127. sage/middleware/services/vdb/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  128. sage/middleware/services/vdb/__pycache__/__init__.cpython-311.pyc +0 -0
  129. sage/middleware/services/vdb/__pycache__/vdb_service.cpython-311.opt-2.pyc +0 -0
  130. sage/middleware/services/vdb/__pycache__/vdb_service.cpython-311.pyc +0 -0
  131. sage/middleware/services/vdb/examples/__pycache__/vdb_demo.cpython-311.opt-2.pyc +0 -0
  132. sage/middleware/services/vdb/examples/__pycache__/vdb_demo.cpython-311.pyc +0 -0
  133. sage/middleware/services/vdb/examples/vdb_demo.py +447 -0
  134. sage/middleware/services/vdb/search_engine/__init__.py +0 -0
  135. sage/middleware/services/vdb/search_engine/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  136. sage/middleware/services/vdb/search_engine/__pycache__/__init__.cpython-311.pyc +0 -0
  137. sage/middleware/services/vdb/search_engine/__pycache__/base_vdb_index.cpython-311.opt-2.pyc +0 -0
  138. sage/middleware/services/vdb/search_engine/__pycache__/base_vdb_index.cpython-311.pyc +0 -0
  139. sage/middleware/services/vdb/search_engine/__pycache__/faiss_index.cpython-311.opt-2.pyc +0 -0
  140. sage/middleware/services/vdb/search_engine/__pycache__/faiss_index.cpython-311.pyc +0 -0
  141. sage/middleware/services/vdb/search_engine/base_vdb_index.py +58 -0
  142. sage/middleware/services/vdb/search_engine/faiss_index.py +461 -0
  143. sage/middleware/services/vdb/vdb_service.py +433 -0
  144. sage/middleware/utils/__init__.py +5 -0
  145. sage/middleware/utils/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  146. sage/middleware/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  147. sage/middleware/utils/embedding/__init__.py +35 -0
  148. sage/middleware/utils/embedding/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  149. sage/middleware/utils/embedding/__pycache__/__init__.cpython-311.pyc +0 -0
  150. sage/middleware/utils/embedding/__pycache__/_cohere.cpython-311.opt-2.pyc +0 -0
  151. sage/middleware/utils/embedding/__pycache__/_cohere.cpython-311.pyc +0 -0
  152. sage/middleware/utils/embedding/__pycache__/bedrock.cpython-311.opt-2.pyc +0 -0
  153. sage/middleware/utils/embedding/__pycache__/bedrock.cpython-311.pyc +0 -0
  154. sage/middleware/utils/embedding/__pycache__/embedding_api.cpython-311.opt-2.pyc +0 -0
  155. sage/middleware/utils/embedding/__pycache__/embedding_api.cpython-311.pyc +0 -0
  156. sage/middleware/utils/embedding/__pycache__/embedding_model.cpython-311.opt-2.pyc +0 -0
  157. sage/middleware/utils/embedding/__pycache__/embedding_model.cpython-311.pyc +0 -0
  158. sage/middleware/utils/embedding/__pycache__/hf.cpython-311.opt-2.pyc +0 -0
  159. sage/middleware/utils/embedding/__pycache__/hf.cpython-311.pyc +0 -0
  160. sage/middleware/utils/embedding/__pycache__/instructor.cpython-311.opt-2.pyc +0 -0
  161. sage/middleware/utils/embedding/__pycache__/instructor.cpython-311.pyc +0 -0
  162. sage/middleware/utils/embedding/__pycache__/jina.cpython-311.opt-2.pyc +0 -0
  163. sage/middleware/utils/embedding/__pycache__/jina.cpython-311.pyc +0 -0
  164. sage/middleware/utils/embedding/__pycache__/lollms.cpython-311.opt-2.pyc +0 -0
  165. sage/middleware/utils/embedding/__pycache__/lollms.cpython-311.pyc +0 -0
  166. sage/middleware/utils/embedding/__pycache__/mockembedder.cpython-311.opt-2.pyc +0 -0
  167. sage/middleware/utils/embedding/__pycache__/mockembedder.cpython-311.pyc +0 -0
  168. sage/middleware/utils/embedding/__pycache__/nvidia_openai.cpython-311.opt-2.pyc +0 -0
  169. sage/middleware/utils/embedding/__pycache__/nvidia_openai.cpython-311.pyc +0 -0
  170. sage/middleware/utils/embedding/__pycache__/ollama.cpython-311.opt-2.pyc +0 -0
  171. sage/middleware/utils/embedding/__pycache__/ollama.cpython-311.pyc +0 -0
  172. sage/middleware/utils/embedding/__pycache__/openai.cpython-311.opt-2.pyc +0 -0
  173. sage/middleware/utils/embedding/__pycache__/openai.cpython-311.pyc +0 -0
  174. sage/middleware/utils/embedding/__pycache__/siliconcloud.cpython-311.opt-2.pyc +0 -0
  175. sage/middleware/utils/embedding/__pycache__/siliconcloud.cpython-311.pyc +0 -0
  176. sage/middleware/utils/embedding/__pycache__/zhipu.cpython-311.opt-2.pyc +0 -0
  177. sage/middleware/utils/embedding/__pycache__/zhipu.cpython-311.pyc +0 -0
  178. sage/middleware/utils/embedding/_cohere.py +68 -0
  179. sage/middleware/utils/embedding/bedrock.py +174 -0
  180. sage/middleware/utils/embedding/embedding_api.py +12 -0
  181. sage/middleware/utils/embedding/embedding_model.py +150 -0
  182. sage/middleware/utils/embedding/hf.py +90 -0
  183. sage/middleware/utils/embedding/instructor.py +10 -0
  184. sage/middleware/utils/embedding/jina.py +115 -0
  185. sage/middleware/utils/embedding/lollms.py +100 -0
  186. sage/middleware/utils/embedding/mockembedder.py +46 -0
  187. sage/middleware/utils/embedding/nvidia_openai.py +97 -0
  188. sage/middleware/utils/embedding/ollama.py +97 -0
  189. sage/middleware/utils/embedding/openai.py +112 -0
  190. sage/middleware/utils/embedding/siliconcloud.py +133 -0
  191. sage/middleware/utils/embedding/zhipu.py +85 -0
@@ -0,0 +1,447 @@
1
+ """
2
+ VDB Service API 使用示例
3
+ 展示如何正确使用VDB微服务的API接口进行向量存储和相似性搜索
4
+ """
5
+ import numpy as np
6
+ from sage.core.api.local_environment import LocalEnvironment
7
+ from sage.middleware.services.services.vdb import create_vdb_service_factory
8
+ from sage.middleware.services.api.vdb_api import VDBServiceAPI
9
+
10
+
11
+ def test_vdb_service_api():
12
+ """测试VDB服务API的正确使用方式"""
13
+ print("🚀 VDB Service API Demo")
14
+ print("=" * 50)
15
+
16
+ # 创建环境
17
+ env = LocalEnvironment("vdb_service_demo")
18
+
19
+ # 注册VDB服务 - FAISS后端
20
+ vdb_factory = create_vdb_service_factory(
21
+ service_name="demo_vdb_service",
22
+ embedding_dimension=384,
23
+ index_type="IndexFlatL2", # 精确搜索
24
+ max_vectors=100000,
25
+ similarity_threshold=0.8
26
+ )
27
+ env.register_service_factory("demo_vdb_service", vdb_factory)
28
+
29
+ print("✅ VDB Service registered with FAISS backend")
30
+ print(" - Index: IndexFlatL2 (精确L2距离)")
31
+ print(" - Dimension: 384")
32
+ print(" - Max vectors: 100,000")
33
+ print(" - Similarity threshold: 0.8")
34
+
35
+ # 在实际应用中,你需要启动环境并获取服务代理
36
+ # env.submit() # 启动环境
37
+ # vdb_service = env.get_service_proxy("demo_vdb_service")
38
+
39
+ # 这里我们演示API接口的预期使用方式
40
+ demonstrate_vdb_api_usage()
41
+
42
+
43
+ def demonstrate_vdb_api_usage():
44
+ """演示VDB服务API的标准使用模式"""
45
+ print("\n📝 VDB Service API Usage Patterns:")
46
+ print("-" * 40)
47
+
48
+ # 展示API接口
49
+ print("💡 VDB Service API Interface:")
50
+ print(" class VDBServiceAPI:")
51
+ print(" - add_vectors(documents: List[Dict]) -> List[str]")
52
+ print(" - search(query_vector, top_k, threshold) -> List[Dict]")
53
+ print(" - get_vector(doc_id: str) -> Optional[Dict]")
54
+ print(" - delete_vectors(doc_ids: List[str]) -> bool")
55
+ print(" - update_vector(doc_id: str, document: Dict) -> bool")
56
+ print(" - count() -> int")
57
+ print(" - save_index(path: str) -> bool")
58
+ print(" - load_index(path: str) -> bool")
59
+
60
+ print("\n📋 Standard Usage Example:")
61
+ usage_code = '''
62
+ # 1. 获取服务代理
63
+ vdb_service = env.get_service_proxy("demo_vdb_service")
64
+
65
+ # 2. 准备向量文档
66
+ documents = [
67
+ {
68
+ "id": "doc_001",
69
+ "vector": np.random.random(384).tolist(), # 384维向量
70
+ "text": "Python是一种高级编程语言",
71
+ "metadata": {
72
+ "category": "programming",
73
+ "language": "python",
74
+ "topic": "introduction"
75
+ }
76
+ },
77
+ {
78
+ "id": "doc_002",
79
+ "vector": np.random.random(384).tolist(),
80
+ "text": "机器学习是人工智能的一个分支",
81
+ "metadata": {
82
+ "category": "ai",
83
+ "topic": "machine_learning"
84
+ }
85
+ }
86
+ ]
87
+
88
+ # 3. 添加向量到数据库
89
+ doc_ids = vdb_service.add_vectors(documents)
90
+ print(f"Added documents: {doc_ids}")
91
+
92
+ # 4. 向量相似性搜索
93
+ query_vector = np.random.random(384).tolist()
94
+ search_results = vdb_service.search(
95
+ query_vector=query_vector,
96
+ top_k=5,
97
+ similarity_threshold=0.8
98
+ )
99
+
100
+ # 5. 获取特定文档
101
+ document = vdb_service.get_vector("doc_001")
102
+
103
+ # 6. 更新文档
104
+ updated_doc = {
105
+ "id": "doc_001",
106
+ "vector": np.random.random(384).tolist(),
107
+ "text": "Python是一种强大的编程语言,广泛用于数据科学",
108
+ "metadata": {"category": "programming", "updated": True}
109
+ }
110
+ success = vdb_service.update_vector("doc_001", updated_doc)
111
+
112
+ # 7. 管理操作
113
+ total_count = vdb_service.count()
114
+ saved = vdb_service.save_index("/path/to/index")
115
+ '''
116
+ print(usage_code)
117
+
118
+ # 模拟执行结果
119
+ print("🎯 Expected Results:")
120
+ operations = [
121
+ ("add_vectors(documents)", "['doc_001', 'doc_002']"),
122
+ ("search(query_vector, top_k=5)", "[{'id': 'doc_001', 'score': 0.92, ...}]"),
123
+ ("get_vector('doc_001')", "{'id': 'doc_001', 'vector': [...], 'text': '...'}"),
124
+ ("update_vector('doc_001', updated_doc)", "True"),
125
+ ("count()", "2"),
126
+ ("save_index('/path/to/index')", "True"),
127
+ ]
128
+
129
+ for operation, result in operations:
130
+ print(f" {operation:<35} -> {result}")
131
+
132
+
133
+ def demonstrate_semantic_search_patterns():
134
+ """演示语义搜索的高级模式"""
135
+ print("\n🔍 Semantic Search Patterns:")
136
+ print("-" * 40)
137
+
138
+ search_patterns = '''
139
+ # 1. 多模态文档搜索
140
+ class DocumentSearchEngine:
141
+ def __init__(self, vdb_service: VDBServiceAPI):
142
+ self.vdb = vdb_service
143
+
144
+ def index_document(self, doc_id: str, title: str, content: str,
145
+ title_embedding: List[float], content_embedding: List[float]):
146
+ """索引文档的标题和内容"""
147
+ # 索引标题
148
+ title_doc = {
149
+ "id": f"{doc_id}_title",
150
+ "vector": title_embedding,
151
+ "text": title,
152
+ "metadata": {"type": "title", "parent_doc": doc_id}
153
+ }
154
+
155
+ # 索引内容
156
+ content_doc = {
157
+ "id": f"{doc_id}_content",
158
+ "vector": content_embedding,
159
+ "text": content,
160
+ "metadata": {"type": "content", "parent_doc": doc_id}
161
+ }
162
+
163
+ return self.vdb.add_vectors([title_doc, content_doc])
164
+
165
+ def semantic_search(self, query_embedding: List[float], doc_type=None):
166
+ """语义搜索"""
167
+ results = self.vdb.search(
168
+ query_vector=query_embedding,
169
+ top_k=20,
170
+ similarity_threshold=0.7
171
+ )
172
+
173
+ # 按文档类型过滤
174
+ if doc_type:
175
+ results = [r for r in results if r["metadata"]["type"] == doc_type]
176
+
177
+ return results
178
+
179
+ # 2. 分层检索
180
+ class HierarchicalRetrieval:
181
+ def __init__(self, vdb_service: VDBServiceAPI):
182
+ self.vdb = vdb_service
183
+
184
+ def coarse_to_fine_search(self, query_vector: List[float]):
185
+ """粗到细的检索策略"""
186
+ # 第一阶段:粗粒度搜索(更多结果,较低阈值)
187
+ coarse_results = self.vdb.search(
188
+ query_vector=query_vector,
189
+ top_k=100,
190
+ similarity_threshold=0.6
191
+ )
192
+
193
+ # 第二阶段:细粒度重排序(基于更复杂的相似性计算)
194
+ fine_results = self.rerank_results(query_vector, coarse_results)
195
+
196
+ return fine_results[:10] # 返回top 10
197
+
198
+ def rerank_results(self, query_vector, candidates):
199
+ """重排序候选结果"""
200
+ # 这里可以使用更复杂的相似性计算
201
+ # 例如:考虑metadata权重、时间衰减等
202
+ return sorted(candidates, key=lambda x: x["score"], reverse=True)
203
+
204
+ # 3. 实时更新索引
205
+ class RealTimeIndex:
206
+ def __init__(self, vdb_service: VDBServiceAPI):
207
+ self.vdb = vdb_service
208
+ self.pending_updates = []
209
+
210
+ def add_document_async(self, document: Dict):
211
+ """异步添加文档"""
212
+ self.pending_updates.append(('add', document))
213
+
214
+ # 批量处理
215
+ if len(self.pending_updates) >= 100:
216
+ self.flush_updates()
217
+
218
+ def flush_updates(self):
219
+ """批量执行更新"""
220
+ add_docs = [doc for action, doc in self.pending_updates if action == 'add']
221
+
222
+ if add_docs:
223
+ self.vdb.add_vectors(add_docs)
224
+
225
+ self.pending_updates.clear()
226
+ '''
227
+ print(search_patterns)
228
+
229
+
230
+ def demonstrate_vector_management():
231
+ """演示向量管理的最佳实践"""
232
+ print("\n🗂️ Vector Management Best Practices:")
233
+ print("-" * 40)
234
+
235
+ management_patterns = '''
236
+ # 1. 向量版本管理
237
+ class VectorVersionManager:
238
+ def __init__(self, vdb_service: VDBServiceAPI):
239
+ self.vdb = vdb_service
240
+
241
+ def add_versioned_vector(self, base_id: str, vector: List[float],
242
+ text: str, version: int = 1):
243
+ """添加带版本的向量"""
244
+ doc_id = f"{base_id}_v{version}"
245
+ document = {
246
+ "id": doc_id,
247
+ "vector": vector,
248
+ "text": text,
249
+ "metadata": {
250
+ "base_id": base_id,
251
+ "version": version,
252
+ "is_latest": True
253
+ }
254
+ }
255
+
256
+ # 将旧版本标记为非最新
257
+ old_versions = self.get_all_versions(base_id)
258
+ for old_doc in old_versions:
259
+ old_doc["metadata"]["is_latest"] = False
260
+ self.vdb.update_vector(old_doc["id"], old_doc)
261
+
262
+ return self.vdb.add_vectors([document])
263
+
264
+ def get_latest_version(self, base_id: str):
265
+ """获取最新版本"""
266
+ # 这需要结合metadata搜索功能
267
+ pass
268
+
269
+ # 2. 索引优化
270
+ class IndexOptimizer:
271
+ def __init__(self, vdb_service: VDBServiceAPI):
272
+ self.vdb = vdb_service
273
+
274
+ def optimize_index(self):
275
+ """优化索引性能"""
276
+ # 保存当前索引
277
+ backup_path = f"/backup/index_{int(time.time())}"
278
+ self.vdb.save_index(backup_path)
279
+
280
+ # 重建索引(如果支持)
281
+ # self.vdb.rebuild_index()
282
+
283
+ print(f"Index optimized, backup saved to {backup_path}")
284
+
285
+ def cleanup_old_vectors(self, retention_days: int = 30):
286
+ """清理旧向量"""
287
+ cutoff_time = time.time() - (retention_days * 24 * 3600)
288
+
289
+ # 这需要结合timestamp metadata
290
+ # old_docs = self.find_vectors_before(cutoff_time)
291
+ # self.vdb.delete_vectors([doc["id"] for doc in old_docs])
292
+
293
+ # 3. 监控和度量
294
+ class VDBMonitor:
295
+ def __init__(self, vdb_service: VDBServiceAPI):
296
+ self.vdb = vdb_service
297
+
298
+ def get_health_metrics(self):
299
+ """获取健康度量"""
300
+ return {
301
+ "total_vectors": self.vdb.count(),
302
+ "index_size": "计算索引大小",
303
+ "average_query_time": "查询平均耗时",
304
+ "memory_usage": "内存使用情况"
305
+ }
306
+ '''
307
+ print(management_patterns)
308
+
309
+ # 模拟向量数据
310
+ print("\n📝 VDB Operations Demo:")
311
+
312
+ # 生成示例向量
313
+ vectors = []
314
+ for i in range(5):
315
+ vector = np.random.random(384).tolist()
316
+ vectors.append({
317
+ "id": f"doc_{i}",
318
+ "vector": vector,
319
+ "text": f"这是第{i}个文档的内容",
320
+ "metadata": {
321
+ "source": "demo",
322
+ "type": "document",
323
+ "index": i
324
+ }
325
+ })
326
+
327
+ print(f" add_vectors({len(vectors)} docs) -> ✅ Added 5 vectors")
328
+
329
+ # 搜索示例
330
+ query_vector = np.random.random(384).tolist()
331
+ print(f" search_vectors(query, top_k=3) -> 📖 Found 3 similar documents")
332
+ print(f" - doc_2 (distance: 0.89)")
333
+ print(f" - doc_1 (distance: 0.91)")
334
+ print(f" - doc_4 (distance: 0.93)")
335
+
336
+ # 其他操作
337
+ print(f" get_vector('doc_1') -> 📖 Retrieved document")
338
+ print(f" count() -> 📊 5 vectors")
339
+ print(f" delete_vectors(['doc_0']) -> 🗑️ Deleted 1 vector")
340
+ print(f" list_vectors(filter={{'type': 'document'}}) -> 📋 4 documents")
341
+
342
+ print("\n💡 VDB Service Features:")
343
+ print(" - FAISS高性能向量检索")
344
+ print(" - 多种索引类型 (Flat, HNSW, IVF, PQ)")
345
+ print(" - 元数据过滤")
346
+ print(" - 向量持久化")
347
+ print(" - 相似度搜索")
348
+
349
+
350
+ def test_vdb_index_types():
351
+ """演示不同的FAISS索引类型"""
352
+ print("\n🔧 FAISS Index Types:")
353
+
354
+ index_configs = {
355
+ "IndexFlatL2": {
356
+ "description": "精确L2距离搜索,适合小数据集",
357
+ "config": {}
358
+ },
359
+ "IndexHNSWFlat": {
360
+ "description": "HNSW图索引,快速近似搜索",
361
+ "config": {
362
+ "HNSW_M": 32,
363
+ "HNSW_EF_CONSTRUCTION": 200,
364
+ "HNSW_EF_SEARCH": 50
365
+ }
366
+ },
367
+ "IndexIVFFlat": {
368
+ "description": "IVF倒排索引,适合大数据集",
369
+ "config": {
370
+ "IVF_NLIST": 100,
371
+ "IVF_NPROBE": 10
372
+ }
373
+ },
374
+ "IndexIVFPQ": {
375
+ "description": "IVF+PQ量化,内存高效",
376
+ "config": {
377
+ "IVF_NLIST": 100,
378
+ "IVF_NPROBE": 10,
379
+ "PQ_M": 8,
380
+ "PQ_NBITS": 8
381
+ }
382
+ }
383
+ }
384
+
385
+ for index_type, info in index_configs.items():
386
+ vdb_factory = create_vdb_service_factory(
387
+ service_name=f"vdb_{index_type.lower()}",
388
+ embedding_dimension=384,
389
+ index_type=index_type,
390
+ faiss_config=info["config"]
391
+ )
392
+ print(f"✅ {index_type}: {info['description']}")
393
+
394
+
395
+ def test_vdb_applications():
396
+ """演示VDB服务的应用场景"""
397
+ print("\n🎯 VDB Service Applications:")
398
+
399
+ applications = [
400
+ {
401
+ "name": "语义搜索",
402
+ "config": {
403
+ "embedding_dimension": 768,
404
+ "index_type": "IndexHNSWFlat",
405
+ "faiss_config": {"HNSW_M": 64}
406
+ },
407
+ "description": "搜索语义相似的文档"
408
+ },
409
+ {
410
+ "name": "推荐系统",
411
+ "config": {
412
+ "embedding_dimension": 256,
413
+ "index_type": "IndexIVFPQ",
414
+ "faiss_config": {"IVF_NLIST": 1000, "PQ_M": 16}
415
+ },
416
+ "description": "基于用户向量推荐相似物品"
417
+ },
418
+ {
419
+ "name": "图像检索",
420
+ "config": {
421
+ "embedding_dimension": 2048,
422
+ "index_type": "IndexFlatL2"
423
+ },
424
+ "description": "查找视觉相似的图像"
425
+ },
426
+ {
427
+ "name": "知识库检索",
428
+ "config": {
429
+ "embedding_dimension": 384,
430
+ "index_type": "IndexIVFFlat",
431
+ "faiss_config": {"IVF_NLIST": 500}
432
+ },
433
+ "description": "RAG应用中的知识检索"
434
+ }
435
+ ]
436
+
437
+ for app in applications:
438
+ print(f" 📚 {app['name']}: {app['description']}")
439
+ print(f" 配置: {app['config']}")
440
+
441
+
442
+ if __name__ == "__main__":
443
+ test_vdb_service_api()
444
+ demonstrate_semantic_search_patterns()
445
+ demonstrate_vector_management()
446
+ print("\n🎯 VDB Service API demo completed!")
447
+ print("\n📚 Next: Check Memory service API examples")
File without changes
@@ -0,0 +1,58 @@
1
+ # file sage/core/sage.service.memory./search_engine/vdb_index/base_vdb_index.py
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import List, Tuple, Dict, Any
5
+ import numpy as np
6
+
7
+ class BaseVDBIndex(ABC):
8
+ def __init__(self, name: str, dim: int):
9
+ """
10
+ 基础索引类初始化
11
+ :param name: 索引名称
12
+ :param dim: 向量维度
13
+ """
14
+ self.name = name
15
+ self.dim = dim
16
+
17
+ @abstractmethod
18
+ def insert(self, vector: np.ndarray, string_id: str) -> None:
19
+ """插入单个向量"""
20
+ pass
21
+
22
+ @abstractmethod
23
+ def batch_insert(self, vectors: List[np.ndarray], string_ids: List[str]) -> None:
24
+ """批量插入向量"""
25
+ pass
26
+
27
+ @abstractmethod
28
+ def delete(self, string_id: str) -> None:
29
+ """删除一个向量(物理或逻辑)"""
30
+ pass
31
+
32
+ @abstractmethod
33
+ def update(self, string_id: str, new_vector: np.ndarray) -> None:
34
+ """更新向量内容"""
35
+ pass
36
+
37
+ @abstractmethod
38
+ def search(self, query_vector: np.ndarray, topk: int = 10) -> Tuple[List[str], List[float]]:
39
+ """向量检索,返回 (string_id, 距离) 列表"""
40
+ pass
41
+
42
+ @classmethod
43
+ @abstractmethod
44
+ def load(cls, name: str, root_path: str) -> "BaseVDBIndex":
45
+ """
46
+ 加载索引实例。
47
+ Load the index instance.
48
+ """
49
+ pass
50
+
51
+ @abstractmethod
52
+ def store(self, root_path: str) -> Dict[str, Any]:
53
+ """
54
+ 存储索引数据到指定目录。
55
+ Store the index data to the specified directory.
56
+ """
57
+ pass
58
+