isage-middleware 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of isage-middleware might be problematic. Click here for more details.

Files changed (379) hide show
  1. isage_middleware-0.1.3.dist-info/METADATA +115 -0
  2. isage_middleware-0.1.3.dist-info/RECORD +291 -0
  3. sage/__init__.py +56 -2
  4. sage/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  5. sage/__pycache__/__init__.cpython-311.pyc +0 -0
  6. sage/middleware/__init__.py +52 -79
  7. sage/middleware/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  8. sage/middleware/__pycache__/__init__.cpython-311.pyc +0 -0
  9. sage/middleware/__pycache__/_version.cpython-311.opt-2.pyc +0 -0
  10. sage/middleware/__pycache__/_version.cpython-311.pyc +0 -0
  11. sage/middleware/_version.py +35 -0
  12. sage/middleware/api/__init__.py +52 -18
  13. sage/middleware/api/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  14. sage/middleware/api/__pycache__/__init__.cpython-311.pyc +0 -0
  15. sage/middleware/api/__pycache__/graph_api.cpython-311.opt-2.pyc +0 -0
  16. sage/middleware/api/__pycache__/graph_api.cpython-311.pyc +0 -0
  17. sage/middleware/api/__pycache__/kv_api.cpython-311.opt-2.pyc +0 -0
  18. sage/middleware/api/__pycache__/kv_api.cpython-311.pyc +0 -0
  19. sage/middleware/api/__pycache__/memory_api.cpython-311.opt-2.pyc +0 -0
  20. sage/middleware/api/__pycache__/memory_api.cpython-311.pyc +0 -0
  21. sage/middleware/api/__pycache__/vdb_api.cpython-311.opt-2.pyc +0 -0
  22. sage/middleware/api/__pycache__/vdb_api.cpython-311.pyc +0 -0
  23. sage/middleware/components/enterprise/__init__.py +56 -0
  24. sage/middleware/components/enterprise/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  25. sage/middleware/components/enterprise/__pycache__/__init__.cpython-311.pyc +0 -0
  26. sage/middleware/components/neuromem/__init__.py +56 -0
  27. sage/middleware/components/neuromem/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  28. sage/middleware/components/neuromem/__pycache__/__init__.cpython-311.pyc +0 -0
  29. sage/middleware/components/neuromem/__pycache__/memory_manager.cpython-311.opt-2.pyc +0 -0
  30. sage/middleware/components/neuromem/__pycache__/memory_manager.cpython-311.pyc +0 -0
  31. sage/middleware/components/neuromem/__pycache__/memory_service.cpython-311.opt-2.pyc +0 -0
  32. sage/middleware/components/neuromem/__pycache__/memory_service.cpython-311.pyc +0 -0
  33. sage/middleware/components/neuromem/memory_collection/__init__.py +56 -0
  34. sage/middleware/components/neuromem/memory_collection/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  35. sage/middleware/components/neuromem/memory_collection/__pycache__/__init__.cpython-311.pyc +0 -0
  36. sage/middleware/components/neuromem/memory_collection/__pycache__/base_collection.cpython-311.opt-2.pyc +0 -0
  37. sage/middleware/components/neuromem/memory_collection/__pycache__/base_collection.cpython-311.pyc +0 -0
  38. sage/middleware/components/neuromem/memory_collection/__pycache__/graph_collection.cpython-311.opt-2.pyc +0 -0
  39. sage/middleware/components/neuromem/memory_collection/__pycache__/graph_collection.cpython-311.pyc +0 -0
  40. sage/middleware/components/neuromem/memory_collection/__pycache__/kv_collection.cpython-311.opt-2.pyc +0 -0
  41. sage/middleware/components/neuromem/memory_collection/__pycache__/kv_collection.cpython-311.pyc +0 -0
  42. sage/middleware/components/neuromem/memory_collection/__pycache__/vdb_collection.cpython-311.opt-2.pyc +0 -0
  43. sage/middleware/components/neuromem/memory_collection/__pycache__/vdb_collection.cpython-311.pyc +0 -0
  44. sage/middleware/components/neuromem/memory_collection/base_collection.py +167 -0
  45. sage/middleware/components/neuromem/memory_collection/graph_collection.py +11 -0
  46. sage/middleware/components/neuromem/memory_collection/kv_collection.py +709 -0
  47. sage/middleware/components/neuromem/memory_collection/vdb_collection.py +922 -0
  48. sage/middleware/components/neuromem/memory_manager.py +401 -0
  49. sage/middleware/components/neuromem/memory_service.py +324 -0
  50. sage/middleware/components/neuromem/micro_service/__init__.py +56 -0
  51. sage/middleware/components/neuromem/micro_service/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  52. sage/middleware/components/neuromem/micro_service/__pycache__/__init__.cpython-311.pyc +0 -0
  53. sage/middleware/components/neuromem/micro_service/__pycache__/neuromem_vdb.cpython-311.opt-2.pyc +0 -0
  54. sage/middleware/components/neuromem/micro_service/__pycache__/neuromem_vdb.cpython-311.pyc +0 -0
  55. sage/middleware/components/neuromem/micro_service/__pycache__/neuromem_vdb_service.cpython-311.opt-2.pyc +0 -0
  56. sage/middleware/components/neuromem/micro_service/__pycache__/neuromem_vdb_service.cpython-311.pyc +0 -0
  57. sage/middleware/components/neuromem/micro_service/neuromem_vdb.py +198 -0
  58. sage/middleware/components/neuromem/micro_service/neuromem_vdb_service.py +118 -0
  59. sage/middleware/components/neuromem/search_engine/__init__.py +56 -0
  60. sage/middleware/components/neuromem/search_engine/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  61. sage/middleware/components/neuromem/search_engine/__pycache__/__init__.cpython-311.pyc +0 -0
  62. sage/middleware/components/neuromem/search_engine/graph_index/__init__.py +56 -0
  63. sage/middleware/components/neuromem/search_engine/graph_index/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  64. sage/middleware/components/neuromem/search_engine/graph_index/__pycache__/__init__.cpython-311.pyc +0 -0
  65. sage/middleware/components/neuromem/search_engine/graph_index/__pycache__/base_graph_index.cpython-311.opt-2.pyc +0 -0
  66. sage/middleware/components/neuromem/search_engine/graph_index/__pycache__/base_graph_index.cpython-311.pyc +0 -0
  67. sage/middleware/components/neuromem/search_engine/graph_index/base_graph_index.py +40 -0
  68. sage/middleware/components/neuromem/search_engine/hybird_index/__init__.py +56 -0
  69. sage/middleware/components/neuromem/search_engine/hybird_index/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  70. sage/middleware/components/neuromem/search_engine/hybird_index/__pycache__/__init__.cpython-311.pyc +0 -0
  71. sage/middleware/components/neuromem/search_engine/kv_index/__init__.py +56 -0
  72. sage/middleware/components/neuromem/search_engine/kv_index/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  73. sage/middleware/components/neuromem/search_engine/kv_index/__pycache__/__init__.cpython-311.pyc +0 -0
  74. sage/middleware/components/neuromem/search_engine/kv_index/__pycache__/base_kv_index.cpython-311.opt-2.pyc +0 -0
  75. sage/middleware/components/neuromem/search_engine/kv_index/__pycache__/base_kv_index.cpython-311.pyc +0 -0
  76. sage/middleware/components/neuromem/search_engine/kv_index/__pycache__/bm25s_index.cpython-311.opt-2.pyc +0 -0
  77. sage/middleware/components/neuromem/search_engine/kv_index/__pycache__/bm25s_index.cpython-311.pyc +0 -0
  78. sage/middleware/components/neuromem/search_engine/kv_index/base_kv_index.py +76 -0
  79. sage/middleware/components/neuromem/search_engine/kv_index/bm25s_index.py +320 -0
  80. sage/middleware/components/neuromem/search_engine/vdb_index/__init__.py +56 -0
  81. sage/middleware/components/neuromem/search_engine/vdb_index/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  82. sage/middleware/components/neuromem/search_engine/vdb_index/__pycache__/__init__.cpython-311.pyc +0 -0
  83. sage/middleware/components/neuromem/search_engine/vdb_index/__pycache__/base_vdb_index.cpython-311.opt-2.pyc +0 -0
  84. sage/middleware/components/neuromem/search_engine/vdb_index/__pycache__/base_vdb_index.cpython-311.pyc +0 -0
  85. sage/middleware/components/neuromem/search_engine/vdb_index/__pycache__/faiss_index.cpython-311.opt-2.pyc +0 -0
  86. sage/middleware/components/neuromem/search_engine/vdb_index/__pycache__/faiss_index.cpython-311.pyc +0 -0
  87. sage/middleware/components/neuromem/search_engine/vdb_index/base_vdb_index.py +53 -0
  88. sage/middleware/components/neuromem/search_engine/vdb_index/faiss_index.py +700 -0
  89. sage/middleware/components/neuromem/storage_engine/__init__.py +56 -0
  90. sage/middleware/components/neuromem/storage_engine/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  91. sage/middleware/components/neuromem/storage_engine/__pycache__/__init__.cpython-311.pyc +0 -0
  92. sage/middleware/components/neuromem/storage_engine/__pycache__/metadata_storage.cpython-311.opt-2.pyc +0 -0
  93. sage/middleware/components/neuromem/storage_engine/__pycache__/metadata_storage.cpython-311.pyc +0 -0
  94. sage/middleware/components/neuromem/storage_engine/__pycache__/text_storage.cpython-311.opt-2.pyc +0 -0
  95. sage/middleware/components/neuromem/storage_engine/__pycache__/text_storage.cpython-311.pyc +0 -0
  96. sage/middleware/components/neuromem/storage_engine/__pycache__/vector_storage.cpython-311.opt-2.pyc +0 -0
  97. sage/middleware/components/neuromem/storage_engine/__pycache__/vector_storage.cpython-311.pyc +0 -0
  98. sage/middleware/components/neuromem/storage_engine/kv_backend/__init__.py +56 -0
  99. sage/middleware/components/neuromem/storage_engine/kv_backend/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  100. sage/middleware/components/neuromem/storage_engine/kv_backend/__pycache__/__init__.cpython-311.pyc +0 -0
  101. sage/middleware/components/neuromem/storage_engine/kv_backend/__pycache__/base_kv_backend.cpython-311.opt-2.pyc +0 -0
  102. sage/middleware/components/neuromem/storage_engine/kv_backend/__pycache__/base_kv_backend.cpython-311.pyc +0 -0
  103. sage/middleware/components/neuromem/storage_engine/kv_backend/__pycache__/dict_kv_backend.cpython-311.opt-2.pyc +0 -0
  104. sage/middleware/components/neuromem/storage_engine/kv_backend/__pycache__/dict_kv_backend.cpython-311.pyc +0 -0
  105. sage/middleware/components/neuromem/storage_engine/kv_backend/base_kv_backend.py +65 -0
  106. sage/middleware/components/neuromem/storage_engine/kv_backend/dict_kv_backend.py +54 -0
  107. sage/middleware/components/neuromem/storage_engine/metadata_storage.py +260 -0
  108. sage/middleware/components/neuromem/storage_engine/text_storage.py +106 -0
  109. sage/middleware/components/neuromem/storage_engine/vector_storage.py +85 -0
  110. sage/middleware/components/neuromem/tests/__init__.py +56 -0
  111. sage/middleware/components/neuromem/tests/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  112. sage/middleware/components/neuromem/tests/__pycache__/__init__.cpython-311.pyc +0 -0
  113. sage/middleware/components/neuromem/tests/__pycache__/test_memory_service.cpython-311.opt-2.pyc +0 -0
  114. sage/middleware/components/neuromem/tests/__pycache__/test_memory_service.cpython-311.pyc +0 -0
  115. sage/middleware/components/neuromem/tests/core_test/__init__.py +56 -0
  116. sage/middleware/components/neuromem/tests/core_test/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  117. sage/middleware/components/neuromem/tests/core_test/__pycache__/__init__.cpython-311.pyc +0 -0
  118. sage/middleware/components/neuromem/tests/core_test/collection_test/__init__.py +56 -0
  119. sage/middleware/components/neuromem/tests/core_test/collection_test/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  120. sage/middleware/components/neuromem/tests/core_test/collection_test/__pycache__/__init__.cpython-311.pyc +0 -0
  121. sage/middleware/components/neuromem/tests/core_test/collection_test/__pycache__/kv_collection_test.cpython-311.opt-2.pyc +0 -0
  122. sage/middleware/components/neuromem/tests/core_test/collection_test/__pycache__/kv_collection_test.cpython-311.pyc +0 -0
  123. sage/middleware/components/neuromem/tests/core_test/collection_test/__pycache__/vdb_collection_test.cpython-311.opt-2.pyc +0 -0
  124. sage/middleware/components/neuromem/tests/core_test/collection_test/__pycache__/vdb_collection_test.cpython-311.pyc +0 -0
  125. sage/middleware/components/neuromem/tests/core_test/collection_test/kv_collection_test.py +60 -0
  126. sage/middleware/components/neuromem/tests/core_test/collection_test/vdb_collection_test.py +88 -0
  127. sage/middleware/components/neuromem/tests/core_test/manager_test.py +154 -0
  128. sage/middleware/components/neuromem/tests/test_memory_service.py +293 -0
  129. sage/middleware/components/neuromem/utils/__init__.py +56 -0
  130. sage/middleware/components/neuromem/utils/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  131. sage/middleware/components/neuromem/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  132. sage/middleware/components/neuromem/utils/__pycache__/path_utils.cpython-311.opt-2.pyc +0 -0
  133. sage/middleware/components/neuromem/utils/__pycache__/path_utils.cpython-311.pyc +0 -0
  134. sage/middleware/components/neuromem/utils/path_utils.py +25 -0
  135. sage/middleware/components/sage_db/__init__.py +56 -0
  136. sage/middleware/components/sage_db/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  137. sage/middleware/components/sage_db/__pycache__/__init__.cpython-311.pyc +0 -0
  138. sage/middleware/components/sage_db/__pycache__/sage_db.cpython-311.opt-2.pyc +0 -0
  139. sage/middleware/components/sage_db/__pycache__/sage_db.cpython-311.pyc +0 -0
  140. sage/middleware/components/sage_db/python/__init__.py +56 -0
  141. sage/middleware/components/sage_db/python/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  142. sage/middleware/components/sage_db/python/__pycache__/__init__.cpython-311.pyc +0 -0
  143. sage/middleware/components/sage_db/python/__pycache__/sage_db.cpython-311.opt-2.pyc +0 -0
  144. sage/middleware/components/sage_db/python/__pycache__/sage_db.cpython-311.pyc +0 -0
  145. sage/middleware/components/sage_db/tests/__pycache__/test_python.cpython-311.opt-2.pyc +0 -0
  146. sage/middleware/components/sage_db/tests/__pycache__/test_python.cpython-311.pyc +0 -0
  147. sage/middleware/examples/__pycache__/api_usage_tutorial.cpython-311.opt-2.pyc +0 -0
  148. sage/middleware/examples/__pycache__/api_usage_tutorial.cpython-311.pyc +0 -0
  149. sage/middleware/examples/__pycache__/microservices_demo.cpython-311.opt-2.pyc +0 -0
  150. sage/middleware/examples/__pycache__/microservices_demo.cpython-311.pyc +0 -0
  151. sage/middleware/examples/__pycache__/microservices_registration_demo.cpython-311.opt-2.pyc +0 -0
  152. sage/middleware/examples/__pycache__/microservices_registration_demo.cpython-311.pyc +0 -0
  153. sage/middleware/examples/api_usage_tutorial.py +3 -3
  154. sage/middleware/examples/dag_microservices_demo.py +7 -8
  155. sage/middleware/examples/microservices_integration_demo.py +8 -11
  156. sage/middleware/examples/microservices_registration_demo.py +8 -12
  157. sage/middleware/services/__init__.py +56 -0
  158. sage/middleware/services/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  159. sage/middleware/services/__pycache__/__init__.cpython-311.pyc +0 -0
  160. sage/middleware/services/graph/__init__.py +52 -4
  161. sage/middleware/services/graph/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  162. sage/middleware/services/graph/__pycache__/__init__.cpython-311.pyc +0 -0
  163. sage/middleware/services/graph/__pycache__/graph_index.cpython-311.opt-2.pyc +0 -0
  164. sage/middleware/services/graph/__pycache__/graph_index.cpython-311.pyc +0 -0
  165. sage/middleware/services/graph/__pycache__/graph_service.cpython-311.opt-2.pyc +0 -0
  166. sage/middleware/services/graph/__pycache__/graph_service.cpython-311.pyc +0 -0
  167. sage/middleware/services/graph/examples/__pycache__/graph_demo.cpython-311.opt-2.pyc +0 -0
  168. sage/middleware/services/graph/examples/__pycache__/graph_demo.cpython-311.pyc +0 -0
  169. sage/middleware/services/graph/examples/graph_demo.py +3 -2
  170. sage/middleware/services/graph/graph_service.py +68 -0
  171. sage/middleware/services/graph/search_engine/__init__.py +56 -0
  172. sage/middleware/services/graph/search_engine/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  173. sage/middleware/services/graph/search_engine/__pycache__/__init__.cpython-311.pyc +0 -0
  174. sage/middleware/services/graph/search_engine/__pycache__/base_graph_index.cpython-311.opt-2.pyc +0 -0
  175. sage/middleware/services/graph/search_engine/__pycache__/base_graph_index.cpython-311.pyc +0 -0
  176. sage/middleware/services/kv/__init__.py +52 -4
  177. sage/middleware/services/kv/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  178. sage/middleware/services/kv/__pycache__/__init__.cpython-311.pyc +0 -0
  179. sage/middleware/services/kv/__pycache__/kv_service.cpython-311.opt-2.pyc +0 -0
  180. sage/middleware/services/kv/__pycache__/kv_service.cpython-311.pyc +0 -0
  181. sage/middleware/services/kv/examples/__pycache__/{kv_demo.cpython-313.opt-2.pyc → kv_demo.cpython-311.opt-2.pyc} +0 -0
  182. sage/middleware/services/kv/examples/__pycache__/{kv_demo.cpython-313.pyc → kv_demo.cpython-311.pyc} +0 -0
  183. sage/middleware/services/kv/examples/kv_demo.py +1 -1
  184. sage/middleware/services/kv/search_engine/__init__.py +56 -0
  185. sage/middleware/services/kv/search_engine/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  186. sage/middleware/services/kv/search_engine/__pycache__/__init__.cpython-311.pyc +0 -0
  187. sage/middleware/services/kv/search_engine/__pycache__/base_kv_index.cpython-311.opt-2.pyc +0 -0
  188. sage/middleware/services/kv/search_engine/__pycache__/base_kv_index.cpython-311.pyc +0 -0
  189. sage/middleware/services/kv/search_engine/__pycache__/bm25s_index.cpython-311.opt-2.pyc +0 -0
  190. sage/middleware/services/kv/search_engine/__pycache__/bm25s_index.cpython-311.pyc +0 -0
  191. sage/middleware/services/memory/__init__.py +52 -8
  192. sage/middleware/services/memory/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  193. sage/middleware/services/memory/__pycache__/__init__.cpython-311.pyc +0 -0
  194. sage/middleware/services/memory/__pycache__/memory_service.cpython-311.opt-2.pyc +0 -0
  195. sage/middleware/services/memory/__pycache__/memory_service.cpython-311.pyc +0 -0
  196. sage/middleware/services/memory/examples/__pycache__/{memory_demo.cpython-313.opt-2.pyc → memory_demo.cpython-311.opt-2.pyc} +0 -0
  197. sage/middleware/services/memory/examples/__pycache__/{memory_demo.cpython-313.pyc → memory_demo.cpython-311.pyc} +0 -0
  198. sage/middleware/services/memory/examples/dag_microservices_demo.py +8 -9
  199. sage/middleware/services/memory/examples/memory_demo.py +4 -4
  200. sage/middleware/services/memory/memory_collection/__pycache__/graph_collection.cpython-311.opt-2.pyc +0 -0
  201. sage/middleware/services/memory/memory_collection/__pycache__/graph_collection.cpython-311.pyc +0 -0
  202. sage/middleware/services/memory/memory_service.py +14 -11
  203. sage/middleware/services/memory/utils/__init__.py +56 -0
  204. sage/middleware/services/memory/utils/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  205. sage/middleware/services/memory/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  206. sage/middleware/services/memory/utils/__pycache__/path_utils.cpython-311.opt-2.pyc +0 -0
  207. sage/middleware/services/memory/utils/__pycache__/path_utils.cpython-311.pyc +0 -0
  208. sage/middleware/services/vdb/__init__.py +52 -4
  209. sage/middleware/services/vdb/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  210. sage/middleware/services/vdb/__pycache__/__init__.cpython-311.pyc +0 -0
  211. sage/middleware/services/vdb/__pycache__/vdb_service.cpython-311.opt-2.pyc +0 -0
  212. sage/middleware/services/vdb/__pycache__/vdb_service.cpython-311.pyc +0 -0
  213. sage/middleware/services/vdb/examples/__pycache__/{vdb_demo.cpython-313.opt-2.pyc → vdb_demo.cpython-311.opt-2.pyc} +0 -0
  214. sage/middleware/services/vdb/examples/__pycache__/{vdb_demo.cpython-313.pyc → vdb_demo.cpython-311.pyc} +0 -0
  215. sage/middleware/services/vdb/examples/vdb_demo.py +2 -2
  216. sage/middleware/services/vdb/search_engine/__init__.py +56 -0
  217. sage/middleware/services/vdb/search_engine/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  218. sage/middleware/services/vdb/search_engine/__pycache__/__init__.cpython-311.pyc +0 -0
  219. sage/middleware/services/vdb/search_engine/__pycache__/base_vdb_index.cpython-311.opt-2.pyc +0 -0
  220. sage/middleware/services/vdb/search_engine/__pycache__/base_vdb_index.cpython-311.pyc +0 -0
  221. sage/middleware/services/vdb/search_engine/__pycache__/faiss_index.cpython-311.opt-2.pyc +0 -0
  222. sage/middleware/services/vdb/search_engine/__pycache__/faiss_index.cpython-311.pyc +0 -0
  223. sage/middleware/services/vdb/vdb_service.py +44 -41
  224. sage/middleware/utils/__init__.py +53 -2
  225. sage/middleware/utils/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  226. sage/middleware/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  227. sage/middleware/utils/embedding/__init__.py +52 -31
  228. sage/middleware/utils/embedding/__pycache__/__init__.cpython-311.opt-2.pyc +0 -0
  229. sage/middleware/utils/embedding/__pycache__/__init__.cpython-311.pyc +0 -0
  230. sage/middleware/utils/embedding/__pycache__/_cohere.cpython-311.opt-2.pyc +0 -0
  231. sage/middleware/utils/embedding/__pycache__/_cohere.cpython-311.pyc +0 -0
  232. sage/middleware/utils/embedding/__pycache__/bedrock.cpython-311.opt-2.pyc +0 -0
  233. sage/middleware/utils/embedding/__pycache__/bedrock.cpython-311.pyc +0 -0
  234. sage/middleware/utils/embedding/__pycache__/embedding_api.cpython-311.opt-2.pyc +0 -0
  235. sage/middleware/utils/embedding/__pycache__/embedding_api.cpython-311.pyc +0 -0
  236. sage/middleware/utils/embedding/__pycache__/embedding_model.cpython-311.opt-2.pyc +0 -0
  237. sage/middleware/utils/embedding/__pycache__/embedding_model.cpython-311.pyc +0 -0
  238. sage/middleware/utils/embedding/__pycache__/hf.cpython-311.opt-2.pyc +0 -0
  239. sage/middleware/utils/embedding/__pycache__/hf.cpython-311.pyc +0 -0
  240. sage/middleware/utils/embedding/__pycache__/instructor.cpython-311.opt-2.pyc +0 -0
  241. sage/middleware/utils/embedding/__pycache__/instructor.cpython-311.pyc +0 -0
  242. sage/middleware/utils/embedding/__pycache__/jina.cpython-311.opt-2.pyc +0 -0
  243. sage/middleware/utils/embedding/__pycache__/jina.cpython-311.pyc +0 -0
  244. sage/middleware/utils/embedding/__pycache__/lollms.cpython-311.opt-2.pyc +0 -0
  245. sage/middleware/utils/embedding/__pycache__/lollms.cpython-311.pyc +0 -0
  246. sage/middleware/utils/embedding/__pycache__/mockembedder.cpython-311.opt-2.pyc +0 -0
  247. sage/middleware/utils/embedding/__pycache__/mockembedder.cpython-311.pyc +0 -0
  248. sage/middleware/utils/embedding/__pycache__/nvidia_openai.cpython-311.opt-2.pyc +0 -0
  249. sage/middleware/utils/embedding/__pycache__/nvidia_openai.cpython-311.pyc +0 -0
  250. sage/middleware/utils/embedding/__pycache__/ollama.cpython-311.opt-2.pyc +0 -0
  251. sage/middleware/utils/embedding/__pycache__/ollama.cpython-311.pyc +0 -0
  252. sage/middleware/utils/embedding/__pycache__/openai.cpython-311.opt-2.pyc +0 -0
  253. sage/middleware/utils/embedding/__pycache__/openai.cpython-311.pyc +0 -0
  254. sage/middleware/utils/embedding/__pycache__/siliconcloud.cpython-311.opt-2.pyc +0 -0
  255. sage/middleware/utils/embedding/__pycache__/siliconcloud.cpython-311.pyc +0 -0
  256. sage/middleware/utils/embedding/__pycache__/zhipu.cpython-311.opt-2.pyc +0 -0
  257. sage/middleware/utils/embedding/__pycache__/zhipu.cpython-311.pyc +0 -0
  258. isage_middleware-0.1.1.dist-info/METADATA +0 -424
  259. isage_middleware-0.1.1.dist-info/RECORD +0 -182
  260. sage/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  261. sage/__pycache__/__init__.cpython-313.pyc +0 -0
  262. sage/middleware/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  263. sage/middleware/__pycache__/__init__.cpython-313.pyc +0 -0
  264. sage/middleware/api/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  265. sage/middleware/api/__pycache__/__init__.cpython-313.pyc +0 -0
  266. sage/middleware/api/__pycache__/graph_api.cpython-313.opt-2.pyc +0 -0
  267. sage/middleware/api/__pycache__/graph_api.cpython-313.pyc +0 -0
  268. sage/middleware/api/__pycache__/kv_api.cpython-313.opt-2.pyc +0 -0
  269. sage/middleware/api/__pycache__/kv_api.cpython-313.pyc +0 -0
  270. sage/middleware/api/__pycache__/memory_api.cpython-313.opt-2.pyc +0 -0
  271. sage/middleware/api/__pycache__/memory_api.cpython-313.pyc +0 -0
  272. sage/middleware/api/__pycache__/vdb_api.cpython-313.opt-2.pyc +0 -0
  273. sage/middleware/api/__pycache__/vdb_api.cpython-313.pyc +0 -0
  274. sage/middleware/enterprise/__init__.py +0 -75
  275. sage/middleware/enterprise/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  276. sage/middleware/enterprise/__pycache__/__init__.cpython-313.pyc +0 -0
  277. sage/middleware/enterprise/sage_db/__init__.py +0 -132
  278. sage/middleware/enterprise/sage_db/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  279. sage/middleware/enterprise/sage_db/__pycache__/__init__.cpython-313.pyc +0 -0
  280. sage/middleware/enterprise/sage_db/__pycache__/sage_db.cpython-313.opt-2.pyc +0 -0
  281. sage/middleware/enterprise/sage_db/__pycache__/sage_db.cpython-313.pyc +0 -0
  282. sage/middleware/enterprise/sage_db/python/__init__.py +0 -7
  283. sage/middleware/enterprise/sage_db/python/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  284. sage/middleware/enterprise/sage_db/python/__pycache__/__init__.cpython-313.pyc +0 -0
  285. sage/middleware/enterprise/sage_db/python/__pycache__/sage_db.cpython-313.opt-2.pyc +0 -0
  286. sage/middleware/enterprise/sage_db/python/__pycache__/sage_db.cpython-313.pyc +0 -0
  287. sage/middleware/enterprise/sage_db/tests/__pycache__/test_python.cpython-313.opt-2.pyc +0 -0
  288. sage/middleware/enterprise/sage_db/tests/__pycache__/test_python.cpython-313.pyc +0 -0
  289. sage/middleware/examples/__pycache__/api_usage_tutorial.cpython-313.opt-2.pyc +0 -0
  290. sage/middleware/examples/__pycache__/api_usage_tutorial.cpython-313.pyc +0 -0
  291. sage/middleware/examples/__pycache__/dag_microservices_demo.cpython-313.opt-2.pyc +0 -0
  292. sage/middleware/examples/__pycache__/dag_microservices_demo.cpython-313.pyc +0 -0
  293. sage/middleware/examples/__pycache__/microservices_demo.cpython-313.opt-2.pyc +0 -0
  294. sage/middleware/examples/__pycache__/microservices_demo.cpython-313.pyc +0 -0
  295. sage/middleware/examples/__pycache__/microservices_integration_demo.cpython-313.opt-2.pyc +0 -0
  296. sage/middleware/examples/__pycache__/microservices_integration_demo.cpython-313.pyc +0 -0
  297. sage/middleware/examples/__pycache__/microservices_registration_demo.cpython-313.opt-2.pyc +0 -0
  298. sage/middleware/examples/__pycache__/microservices_registration_demo.cpython-313.pyc +0 -0
  299. sage/middleware/services/graph/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  300. sage/middleware/services/graph/__pycache__/__init__.cpython-313.pyc +0 -0
  301. sage/middleware/services/graph/__pycache__/graph_index.cpython-313.opt-2.pyc +0 -0
  302. sage/middleware/services/graph/__pycache__/graph_index.cpython-313.pyc +0 -0
  303. sage/middleware/services/graph/__pycache__/graph_service.cpython-313.opt-2.pyc +0 -0
  304. sage/middleware/services/graph/__pycache__/graph_service.cpython-313.pyc +0 -0
  305. sage/middleware/services/graph/examples/__pycache__/graph_demo.cpython-313.opt-2.pyc +0 -0
  306. sage/middleware/services/graph/examples/__pycache__/graph_demo.cpython-313.pyc +0 -0
  307. sage/middleware/services/graph/search_engine/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  308. sage/middleware/services/graph/search_engine/__pycache__/__init__.cpython-313.pyc +0 -0
  309. sage/middleware/services/graph/search_engine/__pycache__/base_graph_index.cpython-313.opt-2.pyc +0 -0
  310. sage/middleware/services/graph/search_engine/__pycache__/base_graph_index.cpython-313.pyc +0 -0
  311. sage/middleware/services/kv/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  312. sage/middleware/services/kv/__pycache__/__init__.cpython-313.pyc +0 -0
  313. sage/middleware/services/kv/__pycache__/kv_service.cpython-313.opt-2.pyc +0 -0
  314. sage/middleware/services/kv/__pycache__/kv_service.cpython-313.pyc +0 -0
  315. sage/middleware/services/kv/search_engine/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  316. sage/middleware/services/kv/search_engine/__pycache__/__init__.cpython-313.pyc +0 -0
  317. sage/middleware/services/kv/search_engine/__pycache__/base_kv_index.cpython-313.opt-2.pyc +0 -0
  318. sage/middleware/services/kv/search_engine/__pycache__/base_kv_index.cpython-313.pyc +0 -0
  319. sage/middleware/services/kv/search_engine/__pycache__/bm25s_index.cpython-313.opt-2.pyc +0 -0
  320. sage/middleware/services/kv/search_engine/__pycache__/bm25s_index.cpython-313.pyc +0 -0
  321. sage/middleware/services/memory/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  322. sage/middleware/services/memory/__pycache__/__init__.cpython-313.pyc +0 -0
  323. sage/middleware/services/memory/__pycache__/memory_service.cpython-313.opt-2.pyc +0 -0
  324. sage/middleware/services/memory/__pycache__/memory_service.cpython-313.pyc +0 -0
  325. sage/middleware/services/memory/examples/__pycache__/dag_microservices_demo.cpython-313.opt-2.pyc +0 -0
  326. sage/middleware/services/memory/examples/__pycache__/dag_microservices_demo.cpython-313.pyc +0 -0
  327. sage/middleware/services/memory/memory_collection/__pycache__/graph_collection.cpython-313.opt-2.pyc +0 -0
  328. sage/middleware/services/memory/memory_collection/__pycache__/graph_collection.cpython-313.pyc +0 -0
  329. sage/middleware/services/memory/utils/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  330. sage/middleware/services/memory/utils/__pycache__/__init__.cpython-313.pyc +0 -0
  331. sage/middleware/services/memory/utils/__pycache__/path_utils.cpython-313.opt-2.pyc +0 -0
  332. sage/middleware/services/memory/utils/__pycache__/path_utils.cpython-313.pyc +0 -0
  333. sage/middleware/services/vdb/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  334. sage/middleware/services/vdb/__pycache__/__init__.cpython-313.pyc +0 -0
  335. sage/middleware/services/vdb/__pycache__/vdb_service.cpython-313.opt-2.pyc +0 -0
  336. sage/middleware/services/vdb/__pycache__/vdb_service.cpython-313.pyc +0 -0
  337. sage/middleware/services/vdb/search_engine/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  338. sage/middleware/services/vdb/search_engine/__pycache__/__init__.cpython-313.pyc +0 -0
  339. sage/middleware/services/vdb/search_engine/__pycache__/base_vdb_index.cpython-313.opt-2.pyc +0 -0
  340. sage/middleware/services/vdb/search_engine/__pycache__/base_vdb_index.cpython-313.pyc +0 -0
  341. sage/middleware/services/vdb/search_engine/__pycache__/faiss_index.cpython-313.opt-2.pyc +0 -0
  342. sage/middleware/services/vdb/search_engine/__pycache__/faiss_index.cpython-313.pyc +0 -0
  343. sage/middleware/utils/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  344. sage/middleware/utils/__pycache__/__init__.cpython-313.pyc +0 -0
  345. sage/middleware/utils/embedding/__pycache__/__init__.cpython-313.opt-2.pyc +0 -0
  346. sage/middleware/utils/embedding/__pycache__/__init__.cpython-313.pyc +0 -0
  347. sage/middleware/utils/embedding/__pycache__/_cohere.cpython-313.opt-2.pyc +0 -0
  348. sage/middleware/utils/embedding/__pycache__/_cohere.cpython-313.pyc +0 -0
  349. sage/middleware/utils/embedding/__pycache__/bedrock.cpython-313.opt-2.pyc +0 -0
  350. sage/middleware/utils/embedding/__pycache__/bedrock.cpython-313.pyc +0 -0
  351. sage/middleware/utils/embedding/__pycache__/embedding_api.cpython-313.opt-2.pyc +0 -0
  352. sage/middleware/utils/embedding/__pycache__/embedding_api.cpython-313.pyc +0 -0
  353. sage/middleware/utils/embedding/__pycache__/embedding_model.cpython-313.opt-2.pyc +0 -0
  354. sage/middleware/utils/embedding/__pycache__/embedding_model.cpython-313.pyc +0 -0
  355. sage/middleware/utils/embedding/__pycache__/hf.cpython-313.opt-2.pyc +0 -0
  356. sage/middleware/utils/embedding/__pycache__/hf.cpython-313.pyc +0 -0
  357. sage/middleware/utils/embedding/__pycache__/instructor.cpython-313.opt-2.pyc +0 -0
  358. sage/middleware/utils/embedding/__pycache__/instructor.cpython-313.pyc +0 -0
  359. sage/middleware/utils/embedding/__pycache__/jina.cpython-313.opt-2.pyc +0 -0
  360. sage/middleware/utils/embedding/__pycache__/jina.cpython-313.pyc +0 -0
  361. sage/middleware/utils/embedding/__pycache__/lollms.cpython-313.opt-2.pyc +0 -0
  362. sage/middleware/utils/embedding/__pycache__/lollms.cpython-313.pyc +0 -0
  363. sage/middleware/utils/embedding/__pycache__/mockembedder.cpython-313.opt-2.pyc +0 -0
  364. sage/middleware/utils/embedding/__pycache__/mockembedder.cpython-313.pyc +0 -0
  365. sage/middleware/utils/embedding/__pycache__/nvidia_openai.cpython-313.opt-2.pyc +0 -0
  366. sage/middleware/utils/embedding/__pycache__/nvidia_openai.cpython-313.pyc +0 -0
  367. sage/middleware/utils/embedding/__pycache__/ollama.cpython-313.opt-2.pyc +0 -0
  368. sage/middleware/utils/embedding/__pycache__/ollama.cpython-313.pyc +0 -0
  369. sage/middleware/utils/embedding/__pycache__/openai.cpython-313.opt-2.pyc +0 -0
  370. sage/middleware/utils/embedding/__pycache__/openai.cpython-313.pyc +0 -0
  371. sage/middleware/utils/embedding/__pycache__/siliconcloud.cpython-313.opt-2.pyc +0 -0
  372. sage/middleware/utils/embedding/__pycache__/siliconcloud.cpython-313.pyc +0 -0
  373. sage/middleware/utils/embedding/__pycache__/zhipu.cpython-313.opt-2.pyc +0 -0
  374. sage/middleware/utils/embedding/__pycache__/zhipu.cpython-313.pyc +0 -0
  375. {isage_middleware-0.1.1.dist-info → isage_middleware-0.1.3.dist-info}/WHEEL +0 -0
  376. {isage_middleware-0.1.1.dist-info → isage_middleware-0.1.3.dist-info}/top_level.txt +0 -0
  377. /sage/middleware/{enterprise → components}/sage_db/python/sage_db.py +0 -0
  378. /sage/middleware/{enterprise → components}/sage_db/sage_db.py +0 -0
  379. /sage/middleware/{enterprise → components}/sage_db/tests/test_python.py +0 -0
@@ -0,0 +1,700 @@
1
+ import os
2
+ import json
3
+ import faiss
4
+ import pickle
5
+ import numpy as np
6
+ from typing import Optional, List, Dict, Any
7
+ from sage.common.utils.logging.custom_logger import CustomLogger
8
+ from sage.middleware.components.neuromem.search_engine.vdb_index.base_vdb_index import BaseVDBIndex
9
+
10
+
11
+ class FaissIndex(BaseVDBIndex):
12
+ def __init__(
13
+ self,
14
+ config: Optional[dict]
15
+ ):
16
+ super().__init__()
17
+ """
18
+ 初始化 FaissIndex 实例,支持两种初始化方式:
19
+ 1. 直接通过声明来创建:传入 config
20
+ 2. 通过 FaissIndex.load() 来加载:调用load方法
21
+
22
+ Initialize the FaissIndex instance with two initialization methods:
23
+ 1. Direct creation: pass config
24
+ 2. Load from disk: use load method
25
+ """
26
+ self.logger = CustomLogger()
27
+ self.config = config
28
+
29
+ # 从config中获取必要参数,否则使用默认值
30
+ self.index_name = self.config.get("name", None)
31
+ if self.index_name is None:
32
+ self.logger.error("索引名称(name)未在config中指定,无法创建索引")
33
+ raise ValueError("索引名称(name)未在config中指定")
34
+
35
+ self.dim = self.config.get("dim", 128)
36
+ self.id_map: Dict[int, str] = {}
37
+ self.rev_map: Dict[str, int] = {}
38
+ self.next_id: int = 1
39
+ self.tombstones: set[str] = set()
40
+ self.tombstone_threshold = self.config.get("tombstone_threshold", 30) # 墓碑阈值
41
+ self.index, self._deletion_supported = self._init_index()
42
+
43
+ # 确保索引被IndexIDMap包装以支持自定义ID
44
+ if not isinstance(self.index, faiss.IndexIDMap):
45
+ self.logger.info("Wrapping index with IndexIDMap")
46
+ self.index = faiss.IndexIDMap(self.index)
47
+
48
+ # 用于检测重复向量的容器
49
+ self.vector_hashes: Dict[str, str] = {} # vector_hash -> string_id
50
+
51
+ def _init_index(self):
52
+ config = self.config # 保持全程都叫config
53
+ index_type = config.get("index_type", "IndexFlatL2")
54
+
55
+ # 基础索引
56
+ if index_type == "IndexFlatL2":
57
+ return faiss.IndexFlatL2(self.dim), True
58
+
59
+ elif index_type == "IndexFlatIP":
60
+ return faiss.IndexFlatIP(self.dim), True
61
+
62
+ # HNSW
63
+ elif index_type == "IndexHNSWFlat":
64
+ hnsw_m = int(config.get("HNSW_M", 32))
65
+ ef_construction = int(config.get("HNSW_EF_CONSTRUCTION", 200))
66
+ index = faiss.IndexHNSWFlat(self.dim, hnsw_m)
67
+ index.hnsw.efConstruction = ef_construction
68
+ if "HNSW_EF_SEARCH" in config:
69
+ index.hnsw.efSearch = int(config["HNSW_EF_SEARCH"])
70
+ return index, False
71
+
72
+ # IVF Flat
73
+ elif index_type == "IndexIVFFlat":
74
+ nlist = int(config.get("IVF_NLIST", 100))
75
+ nprobe = int(config.get("IVF_NPROBE", 10))
76
+ metric = self._get_metric(config.get("IVF_METRIC", "L2"))
77
+ quantizer = faiss.IndexFlatL2(self.dim)
78
+ index = faiss.IndexIVFFlat(quantizer, self.dim, nlist, metric)
79
+ index.nprobe = nprobe
80
+ return index, True
81
+
82
+ # IVF PQ
83
+ elif index_type == "IndexIVFPQ":
84
+ nlist = int(config.get("IVF_NLIST", 100))
85
+ nprobe = int(config.get("IVF_NPROBE", 10))
86
+ m = int(config.get("PQ_M", 8))
87
+ nbits = int(config.get("PQ_NBITS", 8))
88
+ metric = self._get_metric(config.get("IVF_METRIC", "L2"))
89
+ quantizer = faiss.IndexFlatL2(self.dim)
90
+ index = faiss.IndexIVFPQ(quantizer, self.dim, nlist, m, nbits, metric)
91
+ index.nprobe = nprobe
92
+ return index, True
93
+
94
+ # IVF ScalarQuantizer
95
+ elif index_type == "IndexIVFScalarQuantizer":
96
+ nlist = int(config.get("IVF_NLIST", 100))
97
+ nprobe = int(config.get("IVF_NPROBE", 10))
98
+ qtype_str = config.get("SQ_TYPE", "QT_8bit")
99
+ qtype = getattr(faiss.ScalarQuantizer, qtype_str)
100
+ metric = self._get_metric(config.get("IVF_METRIC", "L2"))
101
+ quantizer = faiss.IndexFlatL2(self.dim)
102
+ index = faiss.IndexIVFScalarQuantizer(quantizer, self.dim, nlist, qtype, metric)
103
+ index.nprobe = nprobe
104
+ return index, True
105
+
106
+ # LSH
107
+ elif index_type == "IndexLSH":
108
+ nbits = int(config.get("LSH_NBITS", 512))
109
+ rotate_data = bool(config.get("LSH_ROTATE_DATA", True))
110
+ train_thresholds = bool(config.get("LSH_TRAIN_THRESHOLDS", False))
111
+ index = faiss.IndexLSH(self.dim, nbits, rotate_data, train_thresholds)
112
+ return index, False
113
+
114
+ # PQ
115
+ elif index_type == "IndexPQ":
116
+ m = int(config.get("PQ_M", 8))
117
+ nbits = int(config.get("PQ_NBITS", 8))
118
+ metric = self._get_metric(config.get("PQ_METRIC", "L2"))
119
+ return faiss.IndexPQ(self.dim, m, nbits, metric), False
120
+
121
+ # ScalarQuantizer
122
+ elif index_type == "IndexScalarQuantizer":
123
+ qtype_str = config.get("SQ_TYPE", "QT_8bit")
124
+ qtype = getattr(faiss.ScalarQuantizer, qtype_str)
125
+ metric = self._get_metric(config.get("SQ_METRIC", "L2"))
126
+ return faiss.IndexScalarQuantizer(self.dim, qtype, metric), True
127
+
128
+ # RefineFlat
129
+ elif index_type == "IndexRefineFlat":
130
+ base_type = config.get("FAISS_BASE_INDEX_TYPE", "IndexFlatL2")
131
+ # 临时切换 index_type, 递归用 config 初始化
132
+ orig_type = config.get("index_type", None)
133
+ config["index_type"] = base_type
134
+ base_index, base_deletion_supported = self._init_index()
135
+ if orig_type is not None:
136
+ config["index_type"] = orig_type
137
+ k_factor = float(config.get("REFINE_K_FACTOR", 1.0))
138
+ return faiss.IndexRefineFlat(base_index, k_factor), True
139
+
140
+ # IndexIDMap
141
+ elif index_type == "IndexIDMap":
142
+ base_type = config.get("FAISS_BASE_INDEX_TYPE", "IndexFlatL2")
143
+ orig_type = config.get("index_type", None)
144
+ config["index_type"] = base_type
145
+ base_index, base_deletion_supported = self._init_index()
146
+ if orig_type is not None:
147
+ config["index_type"] = orig_type
148
+ return faiss.IndexIDMap(base_index), base_deletion_supported
149
+
150
+ else:
151
+ raise ValueError(f"Unsupported FAISS index type: {index_type}")
152
+
153
+ def _init_base_index(self):
154
+ """
155
+ 用于 IndexIDMap / IndexRefineFlat 的基础索引初始化
156
+ Initialize base index for IndexIDMap or IndexRefineFlat
157
+ """
158
+ base_type = os.getenv("FAISS_BASE_INDEX_TYPE", "IndexFlatL2")
159
+
160
+ original_type = os.getenv("FAISS_INDEX_TYPE")
161
+ os.environ["FAISS_INDEX_TYPE"] = base_type
162
+ index = self._init_index()
163
+ if original_type:
164
+ os.environ["FAISS_INDEX_TYPE"] = original_type
165
+ return index
166
+
167
+ def _get_vector_hash(self, vector: np.ndarray) -> str:
168
+ """
169
+ 计算向量的哈希值用于检测重复
170
+ Calculate vector hash for duplicate detection
171
+ """
172
+ return str(hash(vector.tobytes()))
173
+
174
+ def _rebuild_index_if_needed(self):
175
+ """
176
+ 当墓碑数量达到阈值时重建索引
177
+ Rebuild index when tombstone count reaches threshold
178
+ """
179
+ if len(self.tombstones) < self.tombstone_threshold:
180
+ return
181
+
182
+ self.logger.warning(f"墓碑数量({len(self.tombstones)})达到阈值({self.tombstone_threshold}),开始重建索引")
183
+
184
+ # 由于FAISS不提供直接获取向量的方法,这里只是清空墓碑
185
+ # 在实际生产环境中,可能需要维护向量的副本或使用其他策略
186
+ self.tombstones.clear()
187
+
188
+ # 清理向量哈希中已删除的条目
189
+ valid_hashes = {}
190
+ for vector_hash, string_id in self.vector_hashes.items():
191
+ if string_id in self.rev_map and string_id not in self.tombstones:
192
+ valid_hashes[vector_hash] = string_id
193
+ self.vector_hashes = valid_hashes
194
+
195
+ self.logger.info("索引重建完成,墓碑已清零")
196
+
197
+ def _get_metric(self, metric_str):
198
+ """
199
+ 获取距离度量方式:L2 或 Inner Product
200
+ Get distance metric: L2 or Inner Product
201
+ """
202
+ return faiss.METRIC_L2 if metric_str == "L2" else faiss.METRIC_INNER_PRODUCT
203
+
204
+ def _build_index(self, vectors: List[np.ndarray], ids: List[str]):
205
+ """
206
+ 构建初始索引并绑定 string ID → int ID 映射关系
207
+ Build initial index and bind string ID to int ID mapping
208
+ """
209
+ np_vectors = np.vstack(vectors).astype("float32")
210
+ int_ids = []
211
+
212
+ for string_id in ids:
213
+ if string_id in self.rev_map:
214
+ int_id = self.rev_map[string_id]
215
+ else:
216
+ int_id = self.next_id
217
+ self.next_id += 1
218
+ self.rev_map[string_id] = int_id
219
+ self.id_map[int_id] = string_id
220
+ int_ids.append(int_id)
221
+
222
+ int_ids_np = np.array(int_ids, dtype=np.int64)
223
+ if not isinstance(self.index, faiss.IndexIDMap):
224
+ self.logger.info("Wrapping index with IndexIDMap")
225
+ self.index = faiss.IndexIDMap(self.index) # 仅当未包装时才包装
226
+ self.index.add_with_ids(np_vectors, int_ids_np) # type: ignore
227
+
228
+ def delete(self, string_id: str) -> int:
229
+ """
230
+ 删除指定ID(物理删除或墓碑标记)
231
+ Delete by ID (physical removal or tombstone marking)
232
+
233
+ Returns:
234
+ 1: 删除成功
235
+ 0: 删除失败(ID不存在)
236
+ """
237
+ if string_id not in self.rev_map:
238
+ self.logger.warning(f"尝试删除不存在的ID: {string_id}")
239
+ return 0
240
+
241
+ int_id = self.rev_map[string_id]
242
+
243
+ if self._deletion_supported:
244
+ try:
245
+ id_vector = np.array([int_id], dtype=np.int64)
246
+ self.index.remove_ids(id_vector) # type: ignore
247
+ # 清理映射关系
248
+ del self.rev_map[string_id]
249
+ del self.id_map[int_id]
250
+ # 清理向量哈希
251
+ vector_hash_to_remove = None
252
+ for vh, sid in self.vector_hashes.items():
253
+ if sid == string_id:
254
+ vector_hash_to_remove = vh
255
+ break
256
+ if vector_hash_to_remove:
257
+ del self.vector_hashes[vector_hash_to_remove]
258
+ self.logger.info(f"成功删除ID: {string_id}")
259
+ return 1
260
+ except Exception as e:
261
+ self.logger.warning(f"物理删除失败,转为墓碑标记: {e}")
262
+ self.tombstones.add(string_id)
263
+ else:
264
+ self.tombstones.add(string_id)
265
+
266
+ # 检查是否需要重建索引
267
+ self._rebuild_index_if_needed()
268
+ self.logger.info(f"ID {string_id} 已标记为墓碑")
269
+ return 1
270
+
271
+ def update(self, string_id: str, new_vector: np.ndarray) -> int:
272
+ """
273
+ 更新指定 ID 的向量:保持原有映射关系,仅替换向量内容
274
+ Update the vector for the given ID, preserving the existing ID mapping.
275
+
276
+ Returns:
277
+ 1: 更新成功
278
+ 0: 更新失败
279
+ """
280
+ if string_id not in self.rev_map:
281
+ # 如果ID不存在,直接插入
282
+ self.logger.info(f"ID {string_id} 不存在,将执行插入操作")
283
+ return self.insert(new_vector, string_id)
284
+
285
+ int_id = self.rev_map[string_id]
286
+
287
+ # 检查新向量是否与其他向量重复
288
+ new_vector_hash = self._get_vector_hash(new_vector)
289
+ if new_vector_hash in self.vector_hashes and self.vector_hashes[new_vector_hash] != string_id:
290
+ self.logger.warning(f"更新失败: 向量与已存在的ID {self.vector_hashes[new_vector_hash]} 重复")
291
+ return 0
292
+
293
+ if self._deletion_supported:
294
+ try:
295
+ # 删除旧向量并插入新向量 / Remove old vector and insert new one
296
+ id_vector = np.array([int_id], dtype=np.int64)
297
+ self.index.remove_ids(id_vector) # type: ignore
298
+ vector = np.expand_dims(new_vector.astype("float32"), axis=0)
299
+ int_id_np = np.array([int_id], dtype=np.int64)
300
+ self.index.add_with_ids(vector, int_id_np) # type: ignore
301
+
302
+ # 更新向量哈希
303
+ old_hash_to_remove = None
304
+ for vh, sid in self.vector_hashes.items():
305
+ if sid == string_id:
306
+ old_hash_to_remove = vh
307
+ break
308
+ if old_hash_to_remove:
309
+ del self.vector_hashes[old_hash_to_remove]
310
+ self.vector_hashes[new_vector_hash] = string_id
311
+
312
+ self.logger.info(f"成功更新ID: {string_id}")
313
+ return 1
314
+ except Exception as e:
315
+ self.logger.error(f"更新失败: {e}")
316
+ return 0
317
+ else:
318
+ # 对于不支持删除的索引,删除旧映射并创建新映射
319
+ if string_id in self.rev_map:
320
+ old_int_id = self.rev_map[string_id]
321
+ if old_int_id in self.id_map:
322
+ del self.id_map[old_int_id]
323
+ del self.rev_map[string_id]
324
+
325
+ new_int_id = self.next_id
326
+ self.next_id += 1
327
+ self.rev_map[string_id] = new_int_id
328
+ self.id_map[new_int_id] = string_id
329
+ vector = np.expand_dims(new_vector.astype("float32"), axis=0)
330
+ int_id_np = np.array([new_int_id], dtype=np.int64)
331
+ self.index.add_with_ids(vector, int_id_np) # type: ignore
332
+
333
+ # 更新向量哈希
334
+ old_hash_to_remove = None
335
+ for vh, sid in self.vector_hashes.items():
336
+ if sid == string_id:
337
+ old_hash_to_remove = vh
338
+ break
339
+ if old_hash_to_remove:
340
+ del self.vector_hashes[old_hash_to_remove]
341
+ self.vector_hashes[new_vector_hash] = string_id
342
+
343
+ self.logger.info(f"成功更新ID: {string_id}")
344
+ return 1
345
+
346
+ def search(self, query_vector: np.ndarray, topk: int = 10, threshold: Optional[float] = None):
347
+ """
348
+ 向量检索 / Vector search
349
+ 返回top_k结果(过滤墓碑) / Return top_k results (filter tombstones)
350
+
351
+ Args:
352
+ query_vector: 查询向量
353
+ topk: 返回结果数量
354
+ threshold: 距离阈值,超过此阈值的结果将被过滤
355
+
356
+ Returns:
357
+ tuple: (结果IDs, 距离列表)
358
+ """
359
+ # 检查索引是否为空
360
+ if self.index.ntotal == 0:
361
+ self.logger.warning("索引为空,无法进行检索")
362
+ return [], []
363
+
364
+ query_vector = np.expand_dims(query_vector.astype("float32"), axis=0)
365
+
366
+ # 考虑墓碑数量,多查询一些结果
367
+ search_k = topk + len(self.tombstones)
368
+ if search_k > self.index.ntotal:
369
+ search_k = self.index.ntotal
370
+
371
+ distances, int_ids = self.index.search(query_vector, search_k) # type: ignore
372
+
373
+ results = []
374
+ filtered_distances = []
375
+
376
+ for i, dist in zip(int_ids[0], distances[0]):
377
+ if i == -1: # FAISS 空槽位标记
378
+ continue
379
+ string_id = self.id_map.get(i)
380
+ if string_id and string_id not in self.tombstones:
381
+ # 应用阈值过滤
382
+ if threshold is not None and dist > threshold:
383
+ continue
384
+ results.append(string_id)
385
+ filtered_distances.append(float(dist)) # 显式转为Python float
386
+ if len(results) >= topk:
387
+ break
388
+
389
+ # 检查结果数量并给出警告
390
+ available_count = len([sid for sid in self.id_map.values() if sid not in self.tombstones])
391
+ if len(results) < topk and len(results) < available_count:
392
+ self.logger.warning(f"期望返回{topk}个结果,实际只找到{len(results)}个结果")
393
+
394
+ if threshold is not None and len(results) == 0:
395
+ self.logger.warning(f"在阈值{threshold}限制下,未找到任何结果")
396
+
397
+ return results, filtered_distances
398
+
399
+ def insert(self, vector: np.ndarray, string_id: str) -> int:
400
+ """
401
+ 插入单个向量及其字符串 ID 到索引中
402
+ Insert a single vector and its string ID into the index
403
+
404
+ Returns:
405
+ 1: 插入成功
406
+ 0: 插入失败(向量重复)
407
+ """
408
+ # 检查向量是否重复
409
+ vector_hash = self._get_vector_hash(vector)
410
+ if vector_hash in self.vector_hashes:
411
+ existing_id = self.vector_hashes[vector_hash]
412
+ self.logger.warning(f"向量重复: 尝试插入的向量与已存在的ID {existing_id} 相同")
413
+ return 0
414
+
415
+ if string_id in self.rev_map:
416
+ int_id = self.rev_map[string_id]
417
+ else:
418
+ int_id = self.next_id
419
+ self.next_id += 1
420
+ self.rev_map[string_id] = int_id
421
+ self.id_map[int_id] = string_id
422
+
423
+ vector = np.expand_dims(vector.astype("float32"), axis=0)
424
+ int_id_np = np.array([int_id], dtype=np.int64)
425
+ self.index.add_with_ids(vector, int_id_np) # type: ignore
426
+
427
+ # 记录向量哈希
428
+ self.vector_hashes[vector_hash] = string_id
429
+
430
+ self.logger.info(f"成功插入向量,ID: {string_id}")
431
+ return 1
432
+
433
+ def batch_insert(self, vectors: List[np.ndarray], string_ids: List[str]) -> int:
434
+ """
435
+ 批量插入多个向量及其对应的 string_id
436
+ Batch insert multiple vectors and their corresponding string_id
437
+
438
+ Returns:
439
+ 成功插入的向量数量
440
+ """
441
+ assert len(vectors) == len(string_ids), "Vectors and IDs must match in length"
442
+
443
+ valid_vectors = []
444
+ valid_ids = []
445
+ success_count = 0
446
+
447
+ for vector, string_id in zip(vectors, string_ids):
448
+ # 检查向量是否重复
449
+ vector_hash = self._get_vector_hash(vector)
450
+ if vector_hash in self.vector_hashes:
451
+ existing_id = self.vector_hashes[vector_hash]
452
+ self.logger.warning(f"跳过重复向量: ID {string_id} 的向量与已存在的ID {existing_id} 相同")
453
+ continue
454
+
455
+ valid_vectors.append(vector)
456
+ valid_ids.append(string_id)
457
+
458
+ if not valid_vectors:
459
+ self.logger.warning("批量插入:所有向量都重复,没有插入任何向量")
460
+ return 0
461
+
462
+ np_vectors = np.vstack(valid_vectors).astype("float32")
463
+ int_ids = []
464
+
465
+ for string_id in valid_ids:
466
+ if string_id in self.rev_map:
467
+ int_id = self.rev_map[string_id]
468
+ else:
469
+ int_id = self.next_id
470
+ self.next_id += 1
471
+ self.rev_map[string_id] = int_id
472
+ self.id_map[int_id] = string_id
473
+ int_ids.append(int_id)
474
+
475
+ int_ids_np = np.array(int_ids, dtype=np.int64)
476
+
477
+ # 确保索引被IDMap包装
478
+ if not isinstance(self.index, faiss.IndexIDMap):
479
+ self.logger.info("Wrapping index with IndexIDMap")
480
+ self.index = faiss.IndexIDMap(self.index)
481
+
482
+ self.index.add_with_ids(np_vectors, int_ids_np) # type: ignore
483
+
484
+ # 记录向量哈希
485
+ for vector, string_id in zip(valid_vectors, valid_ids):
486
+ vector_hash = self._get_vector_hash(vector)
487
+ self.vector_hashes[vector_hash] = string_id
488
+
489
+ success_count = len(valid_vectors)
490
+ self.logger.info(f"批量插入完成,成功插入 {success_count} 个向量")
491
+ return success_count
492
+
493
+ def store(self, dir_path: str) -> Dict[str, Any]:
494
+ """
495
+ 将FAISS索引、参数和映射全部保存到指定目录。
496
+ """
497
+ os.makedirs(dir_path, exist_ok=True)
498
+ # 1. 保存faiss主索引
499
+ faiss.write_index(self.index, os.path.join(dir_path, "faiss.index"))
500
+ # 2. 保存id映射
501
+ with open(os.path.join(dir_path, "id_map.pkl"), "wb") as f:
502
+ pickle.dump(self.id_map, f)
503
+ with open(os.path.join(dir_path, "rev_map.pkl"), "wb") as f:
504
+ pickle.dump(self.rev_map, f)
505
+ with open(os.path.join(dir_path, "tombstones.pkl"), "wb") as f:
506
+ pickle.dump(self.tombstones, f)
507
+ with open(os.path.join(dir_path, "vector_hashes.pkl"), "wb") as f:
508
+ pickle.dump(self.vector_hashes, f)
509
+ # 3. 保存参数(如dim、下一个ID、自定义config等)
510
+ meta = {
511
+ "index_name": self.index_name,
512
+ "dim": self.dim,
513
+ "next_id": self.next_id,
514
+ "deletion_supported": self._deletion_supported,
515
+ "tombstone_threshold": self.tombstone_threshold,
516
+ "config": getattr(self, "config", {}), # 若有config则保存
517
+ }
518
+ with open(os.path.join(dir_path, "meta.json"), "w", encoding="utf-8") as f:
519
+ json.dump(meta, f, ensure_ascii=False, indent=2)
520
+ return {"index_path": dir_path}
521
+
522
+ def _load_data(self, dir_path: str):
523
+ """
524
+ 从目录加载索引文件和映射数据。仅供load类方法调用。
525
+ """
526
+ # 加载faiss索引
527
+ self.index = faiss.read_index(os.path.join(dir_path, "faiss.index"))
528
+
529
+ # 加载ID映射
530
+ with open(os.path.join(dir_path, "id_map.pkl"), "rb") as f:
531
+ self.id_map = pickle.load(f)
532
+ with open(os.path.join(dir_path, "rev_map.pkl"), "rb") as f:
533
+ self.rev_map = pickle.load(f)
534
+ with open(os.path.join(dir_path, "tombstones.pkl"), "rb") as f:
535
+ self.tombstones = pickle.load(f)
536
+
537
+ # 加载向量哈希(如果存在)
538
+ vector_hashes_path = os.path.join(dir_path, "vector_hashes.pkl")
539
+ if os.path.exists(vector_hashes_path):
540
+ with open(vector_hashes_path, "rb") as f:
541
+ self.vector_hashes = pickle.load(f)
542
+ else:
543
+ self.vector_hashes = {}
544
+
545
+ @classmethod
546
+ def load(cls, name: str, load_path: str) -> "FaissIndex":
547
+ """
548
+ 从指定路径加载索引,name参数用于验证
549
+ """
550
+ # 读取meta.json获取保存的参数
551
+ meta_path = os.path.join(load_path, "meta.json")
552
+ with open(meta_path, "r", encoding="utf-8") as f:
553
+ meta = json.load(f)
554
+
555
+ # 验证索引名称
556
+ saved_name = meta["index_name"]
557
+ if name != saved_name:
558
+ raise ValueError(f"索引名称不匹配: 期望 {name}, 实际 {saved_name}")
559
+
560
+ # 创建实例但不调用__init__,避免重复初始化索引
561
+ instance = cls.__new__(cls)
562
+ super(FaissIndex, instance).__init__()
563
+
564
+ # 直接设置从存储中恢复的属性
565
+ instance.config = meta.get("config", {})
566
+ instance.logger = CustomLogger()
567
+ instance.index_name = saved_name
568
+ instance.dim = meta["dim"]
569
+ instance.next_id = meta["next_id"]
570
+ instance._deletion_supported = meta.get("deletion_supported", True)
571
+ instance.tombstone_threshold = meta.get("tombstone_threshold", 30)
572
+ instance.vector_hashes = {} # 将在_load_data中加载
573
+
574
+ # 加载保存的数据
575
+ instance._load_data(load_path)
576
+ return instance
577
+
578
+ if __name__ == "__main__":
579
+ import os
580
+ import shutil
581
+ import numpy as np
582
+
583
+ def colored(text, color):
584
+ # color: "green", "red", "yellow"
585
+ colors = {"green": "\033[92m", "red": "\033[91m", "yellow": "\033[93m", "reset": "\033[0m"}
586
+ return colors.get(color, "") + text + colors["reset"]
587
+
588
+ def print_test_case(desc, expected_ids, expected_dists, actual_ids, actual_dists, digits=4):
589
+ ids_pass = list(expected_ids) == list(actual_ids)
590
+ dists_pass = all(abs(e-a) < 10**-digits for e,a in zip(expected_dists, actual_dists))
591
+ status = "通过" if ids_pass and dists_pass else "不通过"
592
+ color = "green" if status == "通过" else "red"
593
+ print(f"【{desc}】")
594
+ print(f"预期IDs:{expected_ids}")
595
+ print(f"实际IDs:{actual_ids}")
596
+ print(f"预期距离:{expected_dists}")
597
+ print(f"实际距离:{[round(x, digits) for x in actual_dists]}")
598
+ print(f"测试情况:{colored(status, color)}\n")
599
+
600
+ # ==== 基础数据 ====
601
+ dim = 4
602
+ index_name = "test_index"
603
+ root_dir = "./faiss_index_test"
604
+ if os.path.exists(root_dir):
605
+ shutil.rmtree(root_dir)
606
+ os.makedirs(root_dir, exist_ok=True)
607
+
608
+ vectors = [
609
+ np.array([1.0, 0.0, 0.0, 0.0]),
610
+ np.array([0.0, 1.0, 0.0, 0.0]),
611
+ np.array([0.0, 0.0, 1.0, 0.0])
612
+ ]
613
+ ids = ["id1", "id2", "id3"]
614
+
615
+ # 使用新的初始化方式
616
+ config = {"name": index_name, "dim": dim, "tombstone_threshold": 2} # 设置较小的墓碑阈值用于测试
617
+ index = FaissIndex(config=config, vectors=vectors, ids=ids)
618
+ # 1. 检索
619
+ q1 = np.array([1.0, 0.0, 0.0, 0.0])
620
+ r_ids, r_dists = index.search(q1, 3)
621
+ print_test_case("基础检索", ["id1", "id2", "id3"], [0.0, 2.0, 2.0], r_ids, r_dists)
622
+
623
+ # 2. 插入新向量
624
+ result = index.insert(np.array([0.0, 0.0, 0.0, 1.0]), "id4")
625
+ print(f"插入结果: {result} (期望: 1)")
626
+ q2 = np.array([0.0, 0.0, 0.0, 1.0])
627
+ r_ids, r_dists = index.search(q2, 4)
628
+ print_test_case("插入后检索", ["id4", "id1", "id2", "id3"], [0.0, 2.0, 2.0, 2.0], r_ids, r_dists)
629
+
630
+ # 3. 测试重复向量插入
631
+ result = index.insert(np.array([0.0, 0.0, 0.0, 1.0]), "id5") # 重复向量
632
+ print(f"重复向量插入结果: {result} (期望: 0)")
633
+
634
+ # 4. 更新向量
635
+ result = index.update("id1", np.array([0.5, 0.5, 0.0, 0.0]))
636
+ print(f"更新结果: {result} (期望: 1)")
637
+ q3 = np.array([0.5, 0.5, 0.0, 0.0])
638
+ r_ids, r_dists = index.search(q3, 4)
639
+ print_test_case("更新后检索", ['id1', 'id2', 'id3', 'id4'], [0.0, 0.5, 1.5, 1.5], r_ids, r_dists)
640
+
641
+ # 5. 删除向量
642
+ result = index.delete("id2")
643
+ print(f"删除结果: {result} (期望: 1)")
644
+ q4 = np.array([1.0, 0.0, 0.0, 0.0])
645
+ r_ids, r_dists = index.search(q4, 4)
646
+ print_test_case("删除后检索", ['id1', 'id3', 'id4'], [0.5, 2.0, 2.0], r_ids, r_dists)
647
+
648
+ # 6. 测试阈值检索
649
+ r_ids, r_dists = index.search(q4, 4, threshold=1.0)
650
+ print_test_case("阈值检索(threshold=1.0)", ['id1'], [0.5], r_ids, r_dists)
651
+
652
+ # 7. 批量插入
653
+ count = index.batch_insert([
654
+ np.array([0.1, 0.1, 0.1, 0.1]),
655
+ np.array([0.2, 0.2, 0.2, 0.2])
656
+ ], ["id5", "id6"])
657
+ print(f"批量插入结果: {count} (期望: 2)")
658
+ q5 = np.array([0.1, 0.1, 0.1, 0.1])
659
+ r_ids, r_dists = index.search(q5, 6)
660
+ print_test_case("批量插入后检索", ['id5', 'id6', 'id1', 'id3', 'id4'], [0.0, 0.04, 0.34, 0.84, 0.84], r_ids[:5], r_dists[:5], 2)
661
+
662
+ # 8. 测试墓碑阈值重建(删除多个向量触发重建)
663
+ print(colored("\n--- 测试墓碑阈值重建 ---", "yellow"))
664
+ index.delete("id3") # 第二个删除
665
+ index.delete("id4") # 第三个删除,应该触发重建
666
+
667
+ # ==== 持久化保存 ====
668
+ print(colored("\n--- 保存索引到磁盘 ---", "yellow"))
669
+ index.store(root_dir)
670
+ print(colored(f"数据已保存到目录: {root_dir}", "yellow"))
671
+
672
+ # ==== 内存对象清空 ====
673
+ del index
674
+ print(colored("内存对象已清除。", "yellow"))
675
+
676
+ # ==== 读取并检索 ====
677
+ user_input = input(colored("输入 yes 加载刚才保存的数据: ", "yellow"))
678
+ if user_input.strip().lower() == "yes":
679
+ index2 = FaissIndex.load(index_name, root_dir)
680
+ print(colored("数据已从磁盘恢复!", "green"))
681
+
682
+ # 注意:id3和id4已被删除并保存为墓碑,所以恢复后不会出现在结果中
683
+ r_ids, r_dists = index2.search(np.array([0.1, 0.1, 0.1, 0.1]), 5)
684
+ print_test_case("恢复后检索", ["id5", "id6", "id1"], [0.0, 0.04, 0.34], r_ids, r_dists, 2)
685
+
686
+ # 验证墓碑状态
687
+ print(f"当前墓碑数量: {len(index2.tombstones)}")
688
+ print(f"墓碑内容: {index2.tombstones}")
689
+ else:
690
+ print(colored("跳过加载测试。", "yellow"))
691
+
692
+ # ==== 清除磁盘数据 ====
693
+ user_input = input(colored("输入 yes 删除磁盘所有数据: ", "yellow"))
694
+ if user_input.strip().lower() == "yes":
695
+ shutil.rmtree(root_dir)
696
+ print(colored("所有数据已删除!", "green"))
697
+ else:
698
+ print(colored("未执行删除。", "yellow"))
699
+
700
+