pyreindexer 0.5.100000__pp38-pypy38_pp73-macosx_15_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyreindexer/.dylibs/libleveldb.1.23.0.dylib +0 -0
- pyreindexer/.dylibs/libomp.dylib +0 -0
- pyreindexer/.dylibs/libsnappy.1.2.2.dylib +0 -0
- pyreindexer/CMakeLists.txt +91 -0
- pyreindexer/__init__.py +5 -0
- pyreindexer/example/main.py +315 -0
- pyreindexer/exceptions.py +10 -0
- pyreindexer/index_definition.py +58 -0
- pyreindexer/index_search_params.py +68 -0
- pyreindexer/lib/include/pyobjtools.cc +245 -0
- pyreindexer/lib/include/pyobjtools.h +39 -0
- pyreindexer/lib/include/query_wrapper.cc +376 -0
- pyreindexer/lib/include/query_wrapper.h +98 -0
- pyreindexer/lib/include/queryresults_wrapper.h +97 -0
- pyreindexer/lib/include/transaction_wrapper.h +70 -0
- pyreindexer/lib/src/rawpyreindexer.cc +1389 -0
- pyreindexer/lib/src/rawpyreindexer.h +208 -0
- pyreindexer/lib/src/reindexerinterface.cc +240 -0
- pyreindexer/lib/src/reindexerinterface.h +84 -0
- pyreindexer/point.py +19 -0
- pyreindexer/query.py +1264 -0
- pyreindexer/query_results.py +147 -0
- pyreindexer/raiser_mixin.py +89 -0
- pyreindexer/rx_connector.py +576 -0
- pyreindexer/tests/__init__.py +0 -0
- pyreindexer/tests/conftest.py +239 -0
- pyreindexer/tests/embedding_tests/__init__.py +0 -0
- pyreindexer/tests/embedding_tests/test_query.py +135 -0
- pyreindexer/tests/helpers/__init__.py +0 -0
- pyreindexer/tests/helpers/api.py +213 -0
- pyreindexer/tests/helpers/base_helper.py +42 -0
- pyreindexer/tests/helpers/check_helper.py +18 -0
- pyreindexer/tests/helpers/log_helper.py +54 -0
- pyreindexer/tests/helpers/matchers.py +80 -0
- pyreindexer/tests/helpers/server_helper.py +86 -0
- pyreindexer/tests/helpers/transaction.py +34 -0
- pyreindexer/tests/test_data/__init__.py +0 -0
- pyreindexer/tests/test_data/auth.py +28 -0
- pyreindexer/tests/test_data/constants.py +115 -0
- pyreindexer/tests/tests/__init__.py +0 -0
- pyreindexer/tests/tests/test_auth.py +199 -0
- pyreindexer/tests/tests/test_builtin.py +45 -0
- pyreindexer/tests/tests/test_cproto.py +92 -0
- pyreindexer/tests/tests/test_database.py +19 -0
- pyreindexer/tests/tests/test_index.py +104 -0
- pyreindexer/tests/tests/test_items.py +126 -0
- pyreindexer/tests/tests/test_metadata.py +80 -0
- pyreindexer/tests/tests/test_namespace.py +37 -0
- pyreindexer/tests/tests/test_query.py +1035 -0
- pyreindexer/tests/tests/test_sql.py +152 -0
- pyreindexer/tests/tests/test_transaction.py +249 -0
- pyreindexer/transaction.py +216 -0
- pyreindexer-0.5.100000.dist-info/LICENSE +202 -0
- pyreindexer-0.5.100000.dist-info/METADATA +2202 -0
- pyreindexer-0.5.100000.dist-info/RECORD +59 -0
- pyreindexer-0.5.100000.dist-info/WHEEL +5 -0
- pyreindexer-0.5.100000.dist-info/top_level.txt +2 -0
- rawpyreindexerb.so +0 -0
- rawpyreindexerc.so +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.18)
|
|
2
|
+
|
|
3
|
+
project(pyreindexer)
|
|
4
|
+
|
|
5
|
+
set(CMAKE_CXX_STANDARD 20)
|
|
6
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
7
|
+
if(NOT CMAKE_BUILD_TYPE)
|
|
8
|
+
set(CMAKE_BUILD_TYPE "RelWithDebInfo")
|
|
9
|
+
endif()
|
|
10
|
+
|
|
11
|
+
enable_testing()
|
|
12
|
+
|
|
13
|
+
option(CIBUILDWHEEL "Is cibuildwheel environment" OFF)
|
|
14
|
+
if(DEFINED ENV{CIBUILDWHEEL} AND "$ENV{CIBUILDWHEEL}" STREQUAL "1")
|
|
15
|
+
set(CIBUILDWHEEL ON)
|
|
16
|
+
endif()
|
|
17
|
+
|
|
18
|
+
set(PY_MIN_VERSION 3.8)
|
|
19
|
+
if(CIBUILDWHEEL AND APPLE)
|
|
20
|
+
file(GLOB PYTHON_INCLUDE_DIRS "/Library/Frameworks/Python.framework/Versions/*/include/python*")
|
|
21
|
+
include_directories(SYSTEM ${PYTHON_INCLUDE_DIRS})
|
|
22
|
+
elseif(CIBUILDWHEEL)
|
|
23
|
+
file(GLOB PYTHON_INCLUDE_DIRS "/opt/python/*/include/python*")
|
|
24
|
+
include_directories(SYSTEM ${PYTHON_INCLUDE_DIRS})
|
|
25
|
+
else()
|
|
26
|
+
find_package(Python3 ${PY_MIN_VERSION} REQUIRED COMPONENTS Development)
|
|
27
|
+
endif()
|
|
28
|
+
|
|
29
|
+
set(RX_MIN_VERSION 5.7.0)
|
|
30
|
+
find_package(reindexer CONFIG ${RX_MIN_VERSION} REQUIRED)
|
|
31
|
+
|
|
32
|
+
option(WITH_GCOV "Enable instrumented code coverage build" OFF)
|
|
33
|
+
|
|
34
|
+
set(LIB_BUILTIN_NAME "rawpyreindexerb")
|
|
35
|
+
set(LIB_CPROTO_NAME "rawpyreindexerc")
|
|
36
|
+
set(LIBS_EXT ".so")
|
|
37
|
+
|
|
38
|
+
set(LIBSRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/src)
|
|
39
|
+
set(RESOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/include)
|
|
40
|
+
|
|
41
|
+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Werror -Wswitch-enum")
|
|
42
|
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++20 -Wall -Wextra -Werror -Wswitch-enum -Wno-unused-parameter -fexceptions")
|
|
43
|
+
string(REPLACE "-O2" "-O3" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
|
|
44
|
+
|
|
45
|
+
if(CIBUILDWHEEL AND NOT APPLE)
|
|
46
|
+
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -s")
|
|
47
|
+
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
|
|
48
|
+
endif()
|
|
49
|
+
|
|
50
|
+
file(GLOB_RECURSE SRCS ${RESOURCES_DIR}/*.cc ${LIBSRC_DIR}/*.cc)
|
|
51
|
+
|
|
52
|
+
include_directories(
|
|
53
|
+
${LIBSRC_DIR}
|
|
54
|
+
${RESOURCES_DIR}
|
|
55
|
+
${Python3_INCLUDE_DIRS}
|
|
56
|
+
${REINDEXER_INCLUDE_DIRS})
|
|
57
|
+
|
|
58
|
+
link_directories(
|
|
59
|
+
${REINDEXER_LIB_DIR}
|
|
60
|
+
${REINDEXER_LINK_DIRS})
|
|
61
|
+
|
|
62
|
+
add_library(${LIB_BUILTIN_NAME} SHARED ${SRCS})
|
|
63
|
+
add_library(${LIB_CPROTO_NAME} SHARED ${SRCS})
|
|
64
|
+
|
|
65
|
+
target_link_libraries(${LIB_BUILTIN_NAME} ${Python3_LIBRARIES} ${REINDEXER_LIBRARIES})
|
|
66
|
+
target_link_libraries(${LIB_CPROTO_NAME} ${Python3_LIBRARIES} ${REINDEXER_LIBRARIES})
|
|
67
|
+
|
|
68
|
+
target_compile_definitions(${LIB_CPROTO_NAME} PRIVATE PYREINDEXER_CPROTO=1)
|
|
69
|
+
|
|
70
|
+
set_target_properties(${LIB_BUILTIN_NAME} PROPERTIES SUFFIX ${LIBS_EXT})
|
|
71
|
+
set_target_properties(${LIB_CPROTO_NAME} PROPERTIES SUFFIX ${LIBS_EXT})
|
|
72
|
+
|
|
73
|
+
# python does not allow 'lib' prefix
|
|
74
|
+
set_target_properties(${LIB_BUILTIN_NAME} PROPERTIES PREFIX "")
|
|
75
|
+
set_target_properties(${LIB_CPROTO_NAME} PROPERTIES PREFIX "")
|
|
76
|
+
|
|
77
|
+
# We do not want to strictly link to python libs,
|
|
78
|
+
# because we want to build wheels for all python versions using single machine / image / job.
|
|
79
|
+
# When using linking there are unpleasant side effects, for example the wrong version of python is found or
|
|
80
|
+
# python is searched in the wrong place and is not found at all.
|
|
81
|
+
# But we need Python.h during compilation and without this flag we will get "Undefined symbols" at the linking stage.
|
|
82
|
+
# It's not the problem to skip this error, since the user will have his own python.
|
|
83
|
+
if(CIBUILDWHEEL AND APPLE)
|
|
84
|
+
set_target_properties(${LIB_BUILTIN_NAME} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
|
|
85
|
+
set_target_properties(${LIB_CPROTO_NAME} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
|
|
86
|
+
endif()
|
|
87
|
+
|
|
88
|
+
if (WITH_GCOV)
|
|
89
|
+
target_link_libraries(${LIB_BUILTIN_NAME} -fprofile-arcs -ftest-coverage)
|
|
90
|
+
target_link_libraries(${LIB_CPROTO_NAME} -fprofile-arcs -ftest-coverage)
|
|
91
|
+
endif (WITH_GCOV)
|
pyreindexer/__init__.py
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
import random
|
|
2
|
+
|
|
3
|
+
from datetime import timedelta
|
|
4
|
+
from typing import Final, List
|
|
5
|
+
|
|
6
|
+
from pyreindexer import RxConnector
|
|
7
|
+
from pyreindexer.index_search_params import IndexSearchParamHnsw
|
|
8
|
+
from pyreindexer.exceptions import ApiError
|
|
9
|
+
from pyreindexer.query import CondType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def create_index_example(db, namespace):
|
|
13
|
+
index_definition = {
|
|
14
|
+
'name': 'id',
|
|
15
|
+
'json_paths': ['id'],
|
|
16
|
+
'field_type': 'int',
|
|
17
|
+
'index_type': 'hash',
|
|
18
|
+
'is_pk': True,
|
|
19
|
+
'is_array': False,
|
|
20
|
+
'is_dense': False,
|
|
21
|
+
'is_sparse': False,
|
|
22
|
+
'is_no_column': False,
|
|
23
|
+
'collate_mode': 'none',
|
|
24
|
+
'sort_order_letters': '',
|
|
25
|
+
'expire_after': 0,
|
|
26
|
+
'config': {},
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
db.index_add(namespace, index_definition)
|
|
31
|
+
except ApiError:
|
|
32
|
+
db.index_drop(namespace, 'id', timedelta(milliseconds = 1000))
|
|
33
|
+
db.index_add(namespace, index_definition)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def update_index_example(db, namespace):
|
|
37
|
+
index_definition_modified = {
|
|
38
|
+
'name': 'id',
|
|
39
|
+
'json_paths': ['id'],
|
|
40
|
+
'field_type': 'int64',
|
|
41
|
+
'index_type': 'hash',
|
|
42
|
+
'is_pk': True,
|
|
43
|
+
'is_array': False,
|
|
44
|
+
'is_dense': True,
|
|
45
|
+
'is_sparse': False,
|
|
46
|
+
'is_no_column': False,
|
|
47
|
+
'collate_mode': 'none',
|
|
48
|
+
'sort_order_letters': '',
|
|
49
|
+
'expire_after': 0,
|
|
50
|
+
'config': {},
|
|
51
|
+
}
|
|
52
|
+
db.index_update(namespace, index_definition_modified)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def create_items_example(db, namespace):
|
|
56
|
+
items_count = 10
|
|
57
|
+
|
|
58
|
+
for i in range(0, items_count):
|
|
59
|
+
item = {'id': 1, 'name': 'item_' + str(i % 2), 'value': 'check'}
|
|
60
|
+
db.item_upsert(namespace, item, ["id=serial()"])
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def select_item_query_example(db, namespace):
|
|
64
|
+
item_name_for_lookup = 'item_0'
|
|
65
|
+
|
|
66
|
+
return db.exec_sql(f"SELECT * FROM {namespace} WHERE name='{item_name_for_lookup}'", timedelta(milliseconds = 1000))
|
|
67
|
+
|
|
68
|
+
def select_all_item_query_example(db, namespace):
|
|
69
|
+
return db.exec_sql(f'SELECT * FROM {namespace}', timedelta(milliseconds = 1000))
|
|
70
|
+
|
|
71
|
+
def print_all_records_from_namespace(db, namespace, message):
|
|
72
|
+
selected_items_tr = select_all_item_query_example(db, namespace)
|
|
73
|
+
|
|
74
|
+
res_count = selected_items_tr.count()
|
|
75
|
+
print(message, res_count)
|
|
76
|
+
|
|
77
|
+
for item in selected_items_tr:
|
|
78
|
+
print(f'item: {item}')
|
|
79
|
+
|
|
80
|
+
def transaction_example(db, namespace, items_in_base):
|
|
81
|
+
# start transaction
|
|
82
|
+
transaction = db.new_transaction(namespace)
|
|
83
|
+
|
|
84
|
+
items_count = len(items_in_base)
|
|
85
|
+
|
|
86
|
+
# delete first few items
|
|
87
|
+
for i in range(int(items_count / 2)):
|
|
88
|
+
transaction.delete(items_in_base[i])
|
|
89
|
+
|
|
90
|
+
# update last one item, overwrite field 'value'
|
|
91
|
+
item = items_in_base[items_count - 1]
|
|
92
|
+
item['value'] = 'transaction was here'
|
|
93
|
+
transaction.update(item)
|
|
94
|
+
|
|
95
|
+
# stop transaction and commit changes to namespace
|
|
96
|
+
count = transaction.commit_with_count(timedelta(milliseconds = 1000))
|
|
97
|
+
print(f'Transaction updated count: {count}')
|
|
98
|
+
|
|
99
|
+
print_all_records_from_namespace(db, namespace, 'Transaction results count: ')
|
|
100
|
+
|
|
101
|
+
def query_example(db, namespace):
|
|
102
|
+
# query all items
|
|
103
|
+
any_items = (db.new_query(namespace)
|
|
104
|
+
.where('value', CondType.CondAny)
|
|
105
|
+
.sort('id')
|
|
106
|
+
.execute())
|
|
107
|
+
print(f'Query results count (Any): {any_items.count()}')
|
|
108
|
+
for item in any_items:
|
|
109
|
+
print(f'item: {item}')
|
|
110
|
+
|
|
111
|
+
# query some items
|
|
112
|
+
selected_items = (db.new_query(namespace)
|
|
113
|
+
.where('value', CondType.CondEq, 'check')
|
|
114
|
+
.sort('id')
|
|
115
|
+
.limit(4)
|
|
116
|
+
.execute(timedelta(milliseconds = 1000)))
|
|
117
|
+
print(f'Query results count (limited): {selected_items.count()}')
|
|
118
|
+
for item in selected_items:
|
|
119
|
+
print(f'item: {item}')
|
|
120
|
+
|
|
121
|
+
# delete some items
|
|
122
|
+
del_count = (db.new_query(namespace)
|
|
123
|
+
.where('name', CondType.CondEq, 'item_1')
|
|
124
|
+
.delete(timedelta(milliseconds = 1000)))
|
|
125
|
+
print(f'Deleted count: {del_count}')
|
|
126
|
+
|
|
127
|
+
# query all actual items
|
|
128
|
+
any_items = (db.new_query(namespace)
|
|
129
|
+
.where('value', CondType.CondAny)
|
|
130
|
+
.must_execute())
|
|
131
|
+
print(f'Query results count (Any after delete): {any_items.count()}')
|
|
132
|
+
for item in any_items:
|
|
133
|
+
print(f'item: {item}')
|
|
134
|
+
|
|
135
|
+
def modify_query_transaction(db, namespace):
|
|
136
|
+
# start transaction
|
|
137
|
+
transaction = db.new_transaction(namespace)
|
|
138
|
+
|
|
139
|
+
# create an update query and set it for the transaction
|
|
140
|
+
query_upd = db.new_query(namespace).where("id", CondType.CondLe, 5).set("name", ["update_with_query_tx"])
|
|
141
|
+
transaction.update_query(query_upd)
|
|
142
|
+
|
|
143
|
+
# create a delete query and set it for the transaction
|
|
144
|
+
query_del = db.new_query(namespace).where("id", CondType.CondGe, 6)
|
|
145
|
+
transaction.delete_query(query_del)
|
|
146
|
+
|
|
147
|
+
# stop transaction and commit changes to namespace
|
|
148
|
+
transaction.commit(timedelta(milliseconds = 1000))
|
|
149
|
+
|
|
150
|
+
print_all_records_from_namespace(db, namespace, 'Transaction with Query results count: ')
|
|
151
|
+
|
|
152
|
+
def random_vector(dimension: int) -> List[float]:
|
|
153
|
+
return [random.uniform(-10.0, 10.0) for _ in range(dimension)]
|
|
154
|
+
|
|
155
|
+
def float_vector_hnsw_example(db):
|
|
156
|
+
namespace = 'knn_hnsw'
|
|
157
|
+
db.namespace_open(namespace)
|
|
158
|
+
|
|
159
|
+
# create index
|
|
160
|
+
fv_index_name = 'hnsw_idx'
|
|
161
|
+
dimension: Final[int] = 4
|
|
162
|
+
index_definitions = [{'name': 'id',
|
|
163
|
+
'json_paths': ['id'],
|
|
164
|
+
'field_type': 'int',
|
|
165
|
+
'index_type': 'hash',
|
|
166
|
+
'is_pk': True,
|
|
167
|
+
'is_array': False,
|
|
168
|
+
'is_dense': False,
|
|
169
|
+
'is_sparse': False,
|
|
170
|
+
'is_no_column': False,
|
|
171
|
+
'collate_mode': 'none',
|
|
172
|
+
'sort_order_letters': '',
|
|
173
|
+
'expire_after': 0,
|
|
174
|
+
'config': {}},
|
|
175
|
+
{"name": fv_index_name,
|
|
176
|
+
"json_paths": [fv_index_name],
|
|
177
|
+
"field_type": "float_vector",
|
|
178
|
+
"index_type": "hnsw",
|
|
179
|
+
"config": {
|
|
180
|
+
"dimension": dimension,
|
|
181
|
+
"metric": "inner_product",
|
|
182
|
+
"start_size": 100,
|
|
183
|
+
"m": 16,
|
|
184
|
+
"ef_construction": 200,
|
|
185
|
+
"multithreading": 1}}]
|
|
186
|
+
for index in index_definitions:
|
|
187
|
+
db.index_add(namespace, index)
|
|
188
|
+
|
|
189
|
+
# generate items
|
|
190
|
+
transaction = db.new_transaction(namespace)
|
|
191
|
+
for i in range(100):
|
|
192
|
+
transaction.insert({"id": 0, fv_index_name: random_vector(dimension)}, ["id=serial()"])
|
|
193
|
+
transaction.commit(timedelta(seconds = 3))
|
|
194
|
+
|
|
195
|
+
# do query
|
|
196
|
+
param = IndexSearchParamHnsw(k=20, ef=30)
|
|
197
|
+
query_result = (db.new_query(namespace)
|
|
198
|
+
.where_knn(fv_index_name, random_vector(dimension), param)
|
|
199
|
+
.select_fields("vectors()")
|
|
200
|
+
.with_rank()
|
|
201
|
+
.sort(index="rank()", desc=True)
|
|
202
|
+
.must_execute(timedelta(seconds = 1)))
|
|
203
|
+
|
|
204
|
+
# result
|
|
205
|
+
print("HNSW where_knn: ", query_result.count())
|
|
206
|
+
for item in query_result:
|
|
207
|
+
print('item vec: ', item, end='\n')
|
|
208
|
+
|
|
209
|
+
# drop index
|
|
210
|
+
db.index_drop(namespace, fv_index_name, timedelta(milliseconds = 300))
|
|
211
|
+
|
|
212
|
+
def float_vector_brute_force_sql_example(db):
|
|
213
|
+
namespace = 'knn_bf'
|
|
214
|
+
db.namespace_open(namespace)
|
|
215
|
+
|
|
216
|
+
# create index
|
|
217
|
+
fv_index_name = 'bf_idx'
|
|
218
|
+
dimension: Final[int] = 4
|
|
219
|
+
index_definitions = [{'name': 'id',
|
|
220
|
+
'json_paths': ['id'],
|
|
221
|
+
'field_type': 'int',
|
|
222
|
+
'index_type': 'hash',
|
|
223
|
+
'is_pk': True,
|
|
224
|
+
'is_array': False,
|
|
225
|
+
'is_dense': False,
|
|
226
|
+
'is_sparse': False,
|
|
227
|
+
'is_no_column': False,
|
|
228
|
+
'collate_mode': 'none',
|
|
229
|
+
'sort_order_letters': '',
|
|
230
|
+
'expire_after': 0,
|
|
231
|
+
'config': {}},
|
|
232
|
+
{"name": fv_index_name,
|
|
233
|
+
"json_paths": [fv_index_name],
|
|
234
|
+
"field_type": "float_vector",
|
|
235
|
+
"index_type": "vec_bf",
|
|
236
|
+
"config": {
|
|
237
|
+
"dimension": 3,
|
|
238
|
+
"metric": "inner_product",
|
|
239
|
+
"start_size": 10000}}]
|
|
240
|
+
for index in index_definitions:
|
|
241
|
+
db.index_add(namespace, index)
|
|
242
|
+
|
|
243
|
+
# update index
|
|
244
|
+
index_definition_modified = {"name": fv_index_name,
|
|
245
|
+
"json_paths": [fv_index_name],
|
|
246
|
+
"field_type": "float_vector",
|
|
247
|
+
"index_type": "vec_bf",
|
|
248
|
+
"config": {
|
|
249
|
+
"dimension": dimension,
|
|
250
|
+
"metric": "l2",
|
|
251
|
+
"start_size": 1000}}
|
|
252
|
+
db.index_update(namespace, index_definition_modified)
|
|
253
|
+
|
|
254
|
+
# generate items
|
|
255
|
+
transaction = db.new_transaction(namespace)
|
|
256
|
+
for i in range(100):
|
|
257
|
+
transaction.insert({"id": i, fv_index_name: random_vector(dimension)})
|
|
258
|
+
transaction.commit(timedelta(seconds = 3))
|
|
259
|
+
|
|
260
|
+
# execute SQL query SELECT KNN
|
|
261
|
+
value = random_vector(dimension)
|
|
262
|
+
k: Final[int] = 27
|
|
263
|
+
query = f'SELECT *, vectors() FROM {namespace} WHERE KNN({fv_index_name}, {value}, k={k})'
|
|
264
|
+
query_result = db.exec_sql(query, timedelta(seconds = 1))
|
|
265
|
+
print("Select where KNN: ", query_result.count())
|
|
266
|
+
for item in query_result:
|
|
267
|
+
print('item vec: ', item, end='\n')
|
|
268
|
+
|
|
269
|
+
# drop index
|
|
270
|
+
db.index_drop(namespace, fv_index_name, timedelta(milliseconds = 300))
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def rx_example():
|
|
274
|
+
|
|
275
|
+
db = RxConnector(f'builtin:///tmp/pyrx', max_replication_updates_size = 10 * 1024 * 1024)
|
|
276
|
+
#db = RxConnector('cproto://127.0.0.1:6534/pyrx', enable_compression = True, fetch_amount = 500)
|
|
277
|
+
|
|
278
|
+
namespace = 'test_table'
|
|
279
|
+
db.namespace_open(namespace)
|
|
280
|
+
|
|
281
|
+
create_index_example(db, namespace)
|
|
282
|
+
update_index_example(db, namespace)
|
|
283
|
+
|
|
284
|
+
create_items_example(db, namespace)
|
|
285
|
+
print_all_records_from_namespace(db, namespace, 'All items: ')
|
|
286
|
+
|
|
287
|
+
selected_items = select_item_query_example(db, namespace)
|
|
288
|
+
|
|
289
|
+
res_count = selected_items.count()
|
|
290
|
+
print(f'Results count: {res_count}')
|
|
291
|
+
|
|
292
|
+
# disposable QueryResults iterator
|
|
293
|
+
items_copy = []
|
|
294
|
+
for item in selected_items:
|
|
295
|
+
items_copy.append(item)
|
|
296
|
+
print(f'item: {item}')
|
|
297
|
+
|
|
298
|
+
# won't be iterated again
|
|
299
|
+
for item in selected_items:
|
|
300
|
+
print(f'item: {item}')
|
|
301
|
+
|
|
302
|
+
transaction_example(db, namespace, items_copy)
|
|
303
|
+
|
|
304
|
+
query_example(db, namespace)
|
|
305
|
+
|
|
306
|
+
modify_query_transaction(db, namespace)
|
|
307
|
+
|
|
308
|
+
float_vector_hnsw_example(db)
|
|
309
|
+
float_vector_brute_force_sql_example(db)
|
|
310
|
+
|
|
311
|
+
db.close()
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
if __name__ == "__main__":
|
|
315
|
+
rx_example()
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# TODO NOT IMPLEMENTED YET
|
|
2
|
+
# TODO dynamic setters which return self. e.g.: indexDef.name('test_name').is_pk().is_dense()
|
|
3
|
+
# TODO check types for each attrs of index definition
|
|
4
|
+
# TODO check possible values for attrs field_type, index_type, collate_mode
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class IndexDefinition(dict):
|
|
8
|
+
"""IndexDefinition is a dictionary subclass which allows to construct and manage indexes more efficiently.
|
|
9
|
+
NOT IMPLEMENTED YET. USE FIELDS DESCRIPTION ONLY.
|
|
10
|
+
|
|
11
|
+
#### Arguments:
|
|
12
|
+
name (str): An index name.
|
|
13
|
+
json_paths (:obj:`list` of :obj:`str`): A name for mapping a value to a json field.
|
|
14
|
+
field_type (str): A type of field. Possible values are: `int`, `int64`, `double`, `string`, `bool`,
|
|
15
|
+
`composite`, `float_vector`.
|
|
16
|
+
index_type (str): An index type. Possible values are: `hash`, `tree`, `text`, `-`, `hnsw`, `vec_bf`, `ivf`.
|
|
17
|
+
is_pk (bool): True if a field is a primary key.
|
|
18
|
+
is_array (bool): True if an index is an array.
|
|
19
|
+
is_dense (bool): True if an index is dense. Reduce the index size. Saves 8 bytes per unique key value for 'hash'
|
|
20
|
+
and 'tree' index types. For '-' index type saves 4-8 bytes per each element. Useful for indexes with
|
|
21
|
+
high selectivity, but for tree and hash indexes with low selectivity can seriously decrease update
|
|
22
|
+
performance.
|
|
23
|
+
is_no_column (bool): True if allows to disable column subindex. Reduces the index size.
|
|
24
|
+
Allows to save ~(`stored_type_size` * `namespace_items_count`) bytes, where `stored_type_size` is the size
|
|
25
|
+
of the type stored in the index, and `namespace_items_count` is the number of items in the namespace.
|
|
26
|
+
May reduce performance.
|
|
27
|
+
is_sparse (bool): True if a value of an index may be not presented.
|
|
28
|
+
collate_mode (str): Sets an order of values by collate mode. Possible values are:
|
|
29
|
+
`none`, `ascii`, `utf8`, `numeric`, `custom`.
|
|
30
|
+
sort_order_letters (str): Order for a sort sequence for a custom collate mode.
|
|
31
|
+
config (dict): A config for a fulltext and float_vector engine.
|
|
32
|
+
[More about `fulltext`](https://github.com/Restream/reindexer/blob/master/fulltext.md) or
|
|
33
|
+
[More about `float_vector`](https://github.com/Restream/reindexer/blob/master/float_vector.md).
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __getitem__(self, attr):
|
|
37
|
+
self._raise_if_key_error(attr)
|
|
38
|
+
return super(IndexDefinition, self).get(attr)
|
|
39
|
+
|
|
40
|
+
def __setitem__(self, attr, value):
|
|
41
|
+
self._raise_if_key_error(attr)
|
|
42
|
+
super(IndexDefinition, self).update({attr: value})
|
|
43
|
+
return self
|
|
44
|
+
|
|
45
|
+
def update(self, *args, **kwargs):
|
|
46
|
+
raise NotImplementedError(
|
|
47
|
+
'Bulk update is not implemented for IndexDefinition instance')
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def _get_known_attrs() -> list[str]:
|
|
51
|
+
return ['name', 'json_paths', 'field_type', 'index_type', 'is_pk',
|
|
52
|
+
'is_array', 'is_dense', 'is_sparse', 'collate_mode', 'sort_order_letters', 'expire_after', 'config']
|
|
53
|
+
|
|
54
|
+
def _raise_if_key_error(self, attr):
|
|
55
|
+
known_attrs = self._get_known_attrs()
|
|
56
|
+
if attr not in known_attrs:
|
|
57
|
+
raise KeyError("Invalid key '{0}'. Known keys are: '{1}'".format(
|
|
58
|
+
attr, ', '.join(known_attrs)))
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
class IndexSearchParamBruteForce:
|
|
2
|
+
"""Index search param for brute force index. Equal to basic parameters
|
|
3
|
+
|
|
4
|
+
#### Attributes:
|
|
5
|
+
k (int): Expected size of KNN index results. Should not be less than 1
|
|
6
|
+
radius (float): In addition to the parameter `k`, the query results can also be filtered by a `rank` -
|
|
7
|
+
value using the parameter, witch called `radius`. It's named so because, under the `L2`-metric,
|
|
8
|
+
it restricts vectors from query result to a sphere of the specified radius
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, k: int = None, radius: float = None):
|
|
13
|
+
if k is None and radius is None:
|
|
14
|
+
raise ValueError("Either 'k' or 'radius' needs to be specified")
|
|
15
|
+
if k is not None and k < 1:
|
|
16
|
+
raise ValueError("KNN limit 'k' should not be less than 1")
|
|
17
|
+
self.k = k
|
|
18
|
+
self.radius = radius
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class IndexSearchParamHnsw:
|
|
22
|
+
"""Index search param for HNSW index.
|
|
23
|
+
|
|
24
|
+
#### Attributes:
|
|
25
|
+
k (int): Expected size of KNN index results. Should not be less than 1
|
|
26
|
+
ef (int): Size of nearest neighbor buffer that will be filled during fetching. Should not be less than 'k',
|
|
27
|
+
good story when `ef` ~= 1.5 * `k`
|
|
28
|
+
radius (float): In addition to the parameter `k`, the query results can also be filtered by a `rank` -
|
|
29
|
+
value using the parameter, witch called `radius`. It's named so because, under the `L2`-metric,
|
|
30
|
+
it restricts vectors from query result to a sphere of the specified radius
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, ef: int, k: int = None, radius: float = None):
|
|
35
|
+
if k is None and radius is None:
|
|
36
|
+
raise ValueError("Either 'k' or 'radius' needs to be specified")
|
|
37
|
+
if k is not None:
|
|
38
|
+
if k < 1:
|
|
39
|
+
raise ValueError("KNN limit 'k' should not be less than 1")
|
|
40
|
+
if ef < k:
|
|
41
|
+
raise ValueError("'ef' should not be less than 'k'")
|
|
42
|
+
self.k = k
|
|
43
|
+
self.radius = radius
|
|
44
|
+
self.ef = ef
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class IndexSearchParamIvf:
|
|
48
|
+
"""Index search param for IVF index.
|
|
49
|
+
|
|
50
|
+
#### Attributes:
|
|
51
|
+
k (int): Expected size of KNN index results. Should not be less than 1
|
|
52
|
+
nprobe (int): Number of centroids that will be scanned in where. Should not be less than 1
|
|
53
|
+
radius (float): In addition to the parameter `k`, the query results can also be filtered by a `rank` -
|
|
54
|
+
value using the parameter, witch called `radius`. It's named so because, under the `L2`-metric,
|
|
55
|
+
it restricts vectors from query result to a sphere of the specified radius
|
|
56
|
+
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(self, nprobe: int, k: int = None, radius: float = None):
|
|
60
|
+
if k is None and radius is None:
|
|
61
|
+
raise ValueError("Either 'k' or 'radius' needs to be specified")
|
|
62
|
+
if k is not None and k < 1:
|
|
63
|
+
raise ValueError("KNN limit 'k' should not be less than 1")
|
|
64
|
+
if nprobe < 1:
|
|
65
|
+
raise ValueError("'nprobe' should not be less than 1")
|
|
66
|
+
self.nprobe = nprobe
|
|
67
|
+
self.k = k
|
|
68
|
+
self.radius = radius
|