kannolo 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. kannolo-0.1.0/.gitignore +287 -0
  2. kannolo-0.1.0/Cargo.lock +1489 -0
  3. kannolo-0.1.0/Cargo.toml +49 -0
  4. kannolo-0.1.0/PKG-INFO +156 -0
  5. kannolo-0.1.0/README.md +139 -0
  6. kannolo-0.1.0/build.rs +46 -0
  7. kannolo-0.1.0/build_toml_files_pub/build_index_dense_dragon.toml +23 -0
  8. kannolo-0.1.0/build_toml_files_pub/build_index_dense_sift.toml +23 -0
  9. kannolo-0.1.0/build_toml_files_pub/build_index_dense_star.toml +23 -0
  10. kannolo-0.1.0/build_toml_files_pub/build_index_pq_dragon.toml +28 -0
  11. kannolo-0.1.0/build_toml_files_pub/build_index_sparse_splade.toml +24 -0
  12. kannolo-0.1.0/convert_bin_to_npy_arrays.py +88 -0
  13. kannolo-0.1.0/convert_npy_arrays_to_bin.py +62 -0
  14. kannolo-0.1.0/docs/PythonUsage.md +102 -0
  15. kannolo-0.1.0/docs/ReplicateResults.md +17 -0
  16. kannolo-0.1.0/docs/RustUsage.md +158 -0
  17. kannolo-0.1.0/kannolo.png +0 -0
  18. kannolo-0.1.0/notebooks/kannolo_demo_dense_plain.ipynb +353 -0
  19. kannolo-0.1.0/notebooks/kannolo_demo_sparse.ipynb +279 -0
  20. kannolo-0.1.0/pyproject.toml +32 -0
  21. kannolo-0.1.0/scripts_experiments/build_index.py +316 -0
  22. kannolo-0.1.0/scripts_experiments/run_search.py +412 -0
  23. kannolo-0.1.0/search_toml_files_pub/run_search_dense_dragon.toml +51 -0
  24. kannolo-0.1.0/search_toml_files_pub/run_search_dense_sift.toml +51 -0
  25. kannolo-0.1.0/search_toml_files_pub/run_search_dense_star.toml +51 -0
  26. kannolo-0.1.0/search_toml_files_pub/run_search_pq_dragon.toml +54 -0
  27. kannolo-0.1.0/search_toml_files_pub/run_search_sparse.toml +51 -0
  28. kannolo-0.1.0/src/bin/hnsw_plain_dense_build.rs +80 -0
  29. kannolo-0.1.0/src/bin/hnsw_plain_dense_build_f16.rs +85 -0
  30. kannolo-0.1.0/src/bin/hnsw_plain_dense_run.rs +124 -0
  31. kannolo-0.1.0/src/bin/hnsw_plain_dense_run_f16.rs +129 -0
  32. kannolo-0.1.0/src/bin/hnsw_plain_sparse_build.rs +83 -0
  33. kannolo-0.1.0/src/bin/hnsw_plain_sparse_run.rs +130 -0
  34. kannolo-0.1.0/src/bin/hnsw_pq_build.rs +288 -0
  35. kannolo-0.1.0/src/bin/hnsw_pq_run.rs +264 -0
  36. kannolo-0.1.0/src/clustering/kmeans/mod.rs +307 -0
  37. kannolo-0.1.0/src/datasets/dataset.rs +67 -0
  38. kannolo-0.1.0/src/datasets/dense_dataset.rs +399 -0
  39. kannolo-0.1.0/src/datasets/sparse_dataset.rs +829 -0
  40. kannolo-0.1.0/src/datasets/utils.rs +183 -0
  41. kannolo-0.1.0/src/distances/dot_product.rs +342 -0
  42. kannolo-0.1.0/src/distances/euclidean_distance.rs +236 -0
  43. kannolo-0.1.0/src/distances/simd/distances.rs +1004 -0
  44. kannolo-0.1.0/src/distances/simd/transpose.rs +347 -0
  45. kannolo-0.1.0/src/distances/simd/utils.rs +266 -0
  46. kannolo-0.1.0/src/distances.rs +11 -0
  47. kannolo-0.1.0/src/hnsw/graph_index.rs +494 -0
  48. kannolo-0.1.0/src/hnsw_utils/config_hnsw.rs +417 -0
  49. kannolo-0.1.0/src/hnsw_utils/hnsw_builder.rs +1227 -0
  50. kannolo-0.1.0/src/hnsw_utils/level.rs +195 -0
  51. kannolo-0.1.0/src/hnsw_utils/mod.rs +1192 -0
  52. kannolo-0.1.0/src/hnsw_utils/visited_table.rs +240 -0
  53. kannolo-0.1.0/src/index_serializer/mod.rs +22 -0
  54. kannolo-0.1.0/src/lib.rs +258 -0
  55. kannolo-0.1.0/src/pylib/mod.rs +1236 -0
  56. kannolo-0.1.0/src/quantizers/decoder.rs +45 -0
  57. kannolo-0.1.0/src/quantizers/encoder.rs +261 -0
  58. kannolo-0.1.0/src/quantizers/plain_quantizer.rs +133 -0
  59. kannolo-0.1.0/src/quantizers/pq.rs +585 -0
  60. kannolo-0.1.0/src/quantizers/quantizer.rs +68 -0
  61. kannolo-0.1.0/src/quantizers/sparse_plain_quantizer.rs +107 -0
  62. kannolo-0.1.0/src/quantizers.rs +6 -0
  63. kannolo-0.1.0/src/topk_selectors/topk_heap.rs +845 -0
  64. kannolo-0.1.0/src/topk_selectors.rs +14 -0
  65. kannolo-0.1.0/src/utils/mod.rs +466 -0
@@ -0,0 +1,287 @@
1
+ ### Linux ###
2
+ *~
3
+
4
+ # temporary files which can be created if a process still has a handle open of a deleted file
5
+ .fuse_hidden*
6
+
7
+ # KDE directory preferences
8
+ .directory
9
+
10
+ # Linux trash folder which might appear on any partition or disk
11
+ .Trash-*
12
+
13
+ # .nfs files are created when an open file is removed but is still being accessed
14
+ .nfs*
15
+
16
+ ### macOS ###
17
+ # General
18
+ .DS_Store
19
+ .AppleDouble
20
+ .LSOverride
21
+
22
+ # Icon must end with two \r
23
+ Icon
24
+
25
+
26
+ # Thumbnails
27
+ ._*
28
+
29
+ # Files that might appear in the root of a volume
30
+ .DocumentRevisions-V100
31
+ .fseventsd
32
+ .Spotlight-V100
33
+ .TemporaryItems
34
+ .Trashes
35
+ .VolumeIcon.icns
36
+ .com.apple.timemachine.donotpresent
37
+
38
+ # Directories potentially created on remote AFP share
39
+ .AppleDB
40
+ .AppleDesktop
41
+ Network Trash Folder
42
+ Temporary Items
43
+ .apdisk
44
+
45
+ ### macOS Patch ###
46
+ # iCloud generated files
47
+ *.icloud
48
+
49
+ ### Python ###
50
+ # Byte-compiled / optimized / DLL files
51
+ __pycache__/
52
+ *.py[cod]
53
+ *$py.class
54
+
55
+ # C extensions
56
+ *.so
57
+
58
+ # Distribution / packaging
59
+ .Python
60
+ build/
61
+ develop-eggs/
62
+ dist/
63
+ downloads/
64
+ eggs/
65
+ .eggs/
66
+ lib/
67
+ lib64/
68
+ parts/
69
+ sdist/
70
+ var/
71
+ wheels/
72
+ share/python-wheels/
73
+ *.egg-info/
74
+ .installed.cfg
75
+ *.egg
76
+ MANIFEST
77
+
78
+ # PyInstaller
79
+ # Usually these files are written by a python script from a template
80
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
81
+ *.manifest
82
+ *.spec
83
+
84
+ # Installer logs
85
+ pip-log.txt
86
+ pip-delete-this-directory.txt
87
+
88
+ # Unit test / coverage reports
89
+ htmlcov/
90
+ .tox/
91
+ .nox/
92
+ .coverage
93
+ .coverage.*
94
+ .cache
95
+ nosetests.xml
96
+ coverage.xml
97
+ *.cover
98
+ *.py,cover
99
+ .hypothesis/
100
+ .pytest_cache/
101
+ cover/
102
+
103
+ # Translations
104
+ *.mo
105
+ *.pot
106
+
107
+ # Django stuff:
108
+ *.log
109
+ local_settings.py
110
+ db.sqlite3
111
+ db.sqlite3-journal
112
+
113
+ # Flask stuff:
114
+ instance/
115
+ .webassets-cache
116
+
117
+ # Scrapy stuff:
118
+ .scrapy
119
+
120
+ # Sphinx documentation
121
+ docs/_build/
122
+
123
+ # PyBuilder
124
+ .pybuilder/
125
+ target/
126
+
127
+ # Jupyter Notebook
128
+ .ipynb_checkpoints
129
+
130
+ # IPython
131
+ profile_default/
132
+ ipython_config.py
133
+
134
+ # pyenv
135
+ # For a library or package, you might want to ignore these files since the code is
136
+ # intended to run in multiple environments; otherwise, check them in:
137
+ # .python-version
138
+
139
+ # pipenv
140
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
141
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
142
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
143
+ # install all needed dependencies.
144
+ #Pipfile.lock
145
+
146
+ # poetry
147
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
148
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
149
+ # commonly ignored for libraries.
150
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
151
+ #poetry.lock
152
+
153
+ # pdm
154
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
155
+ #pdm.lock
156
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
157
+ # in version control.
158
+ # https://pdm.fming.dev/#use-with-ide
159
+ .pdm.toml
160
+
161
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
162
+ __pypackages__/
163
+
164
+ # Celery stuff
165
+ celerybeat-schedule
166
+ celerybeat.pid
167
+
168
+ # SageMath parsed files
169
+ *.sage.py
170
+
171
+ # Environments
172
+ .env
173
+ .venv
174
+ env/
175
+ venv/
176
+ ENV/
177
+ env.bak/
178
+ venv.bak/
179
+
180
+ # Spyder project settings
181
+ .spyderproject
182
+ .spyproject
183
+
184
+ # Rope project settings
185
+ .ropeproject
186
+
187
+ # mkdocs documentation
188
+ /site
189
+
190
+ # mypy
191
+ .mypy_cache/
192
+ .dmypy.json
193
+ dmypy.json
194
+
195
+ # Pyre type checker
196
+ .pyre/
197
+
198
+ # pytype static type analyzer
199
+ .pytype/
200
+
201
+ # Cython debug symbols
202
+ cython_debug/
203
+
204
+ # PyCharm
205
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
206
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
207
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
208
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
209
+ #.idea/
210
+
211
+ ### Python Patch ###
212
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
213
+ poetry.toml
214
+
215
+ # ruff
216
+ .ruff_cache/
217
+
218
+ # LSP config files
219
+ pyrightconfig.json
220
+
221
+ ### Rust ###
222
+ # Generated by Cargo
223
+ # will have compiled files and executables
224
+ debug/
225
+
226
+ # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
227
+ # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
228
+ Cargo.lock
229
+
230
+ # These are backup files generated by rustfmt
231
+ **/*.rs.bk
232
+
233
+ # MSVC Windows builds of rustc generate these, which store debugging information
234
+ *.pdb
235
+
236
+ ### Windows ###
237
+ # Windows thumbnail cache files
238
+ Thumbs.db
239
+ Thumbs.db:encryptable
240
+ ehthumbs.db
241
+ ehthumbs_vista.db
242
+
243
+ # Dump file
244
+ *.stackdump
245
+
246
+ # Folder config file
247
+ [Dd]esktop.ini
248
+
249
+ # Recycle Bin used on file shares
250
+ $RECYCLE.BIN/
251
+
252
+ # Windows Installer files
253
+ *.cab
254
+ *.msi
255
+ *.msix
256
+ *.msm
257
+ *.msp
258
+
259
+ # Windows shortcuts
260
+ *.lnk
261
+
262
+ # target
263
+ ./target/
264
+
265
+ # JetBrains
266
+ .idea/
267
+
268
+ # local script must added manually
269
+ *.sh
270
+
271
+ # data for testing directory
272
+ data_for_testing/
273
+
274
+ # benchmark files
275
+ *.txt
276
+ *.csv
277
+ *.tsv
278
+
279
+ # serialized indexes
280
+ /index_hnsw_serialized
281
+
282
+ # experiments data and results
283
+ /experiments
284
+
285
+ # Privte toml files
286
+ /build_toml_files
287
+ /search_toml_files