faissx 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,80 @@
1
+ # Dependencies
2
+ node_modules/
3
+ jspm_packages/
4
+ bower_components/
5
+ package-lock.json
6
+ yarn.lock
7
+ yarn-error.log
8
+ .pnp/
9
+ .pnp.js
10
+
11
+ # Python
12
+ __pycache__/
13
+ *.py[cod]
14
+ *$py.class
15
+ .pytest_cache/
16
+ .coverage
17
+ htmlcov/
18
+ .tox/
19
+ .nox/
20
+ .hypothesis/
21
+ .egg-info/
22
+ .eggs/
23
+ *.egg
24
+ venv/
25
+ env/
26
+ ENV/
27
+ .venv/
28
+ .env/
29
+ .python-version
30
+
31
+ # Jekyll / Documentation
32
+ docs/_site/
33
+ docs/.sass-cache/
34
+ docs/.jekyll-cache/
35
+ docs/.jekyll-metadata
36
+ docs/Gemfile.lock
37
+ .jekyll-cache/
38
+
39
+ # Cache files
40
+ .npm
41
+ .eslintcache
42
+ .stylelintcache
43
+ .node_repl_history
44
+ .yarn-integrity
45
+ .cache/
46
+ .parcel-cache/
47
+ .next/
48
+ .nuxt/
49
+ .vuepress/dist
50
+ .serverless/
51
+ .fusebox/
52
+ .dynamodb/
53
+ .docusaurus
54
+ .umi
55
+ .umi-production
56
+ .umi-test
57
+ .rts2_cache_cjs/
58
+ .rts2_cache_es/
59
+ .rts2_cache_umd/
60
+
61
+ # OS generated files
62
+ .DS_Store
63
+ .DS_Store?
64
+ ._*
65
+ .Spotlight-V100
66
+ .Trashes
67
+ ehthumbs.db
68
+ Thumbs.db
69
+ *.swp
70
+ *.swo
71
+ .idea/
72
+ .vscode/
73
+ *.sublime-workspace
74
+ *.sublime-project
75
+ .project
76
+ .classpath
77
+ .settings/
78
+ *.launch
79
+ *.tmproj
80
+ .history/
faissx-0.0.2/.flake8 ADDED
@@ -0,0 +1,3 @@
1
+ [flake8]
2
+ max-line-length = 100
3
+ exclude = .git,__pycache__,build,dist
@@ -0,0 +1,178 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
175
+
176
+ .DS_Store
177
+ client/muxi-implementation.md
178
+ data/*
@@ -0,0 +1,426 @@
1
+ Metadata-Version: 2.4
2
+ Name: faissx
3
+ Version: 0.0.2
4
+ Summary: High-performance vector database proxy using FAISS and ZeroMQ
5
+ Home-page: https://github.com/muxi-ai/faissx
6
+ Author: Ran Aroussi
7
+ Author-email: ran@aroussi.com
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.8
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: faiss-cpu>=1.7.2
22
+ Requires-Dist: numpy>=1.19.5
23
+ Requires-Dist: pyzmq>=22.0.0
24
+ Requires-Dist: msgpack>=1.0.2
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=6.0.0; extra == "dev"
27
+ Requires-Dist: black>=21.5b2; extra == "dev"
28
+ Requires-Dist: isort>=5.9.1; extra == "dev"
29
+ Requires-Dist: mypy>=0.812; extra == "dev"
30
+ Dynamic: author
31
+ Dynamic: author-email
32
+ Dynamic: classifier
33
+ Dynamic: description
34
+ Dynamic: description-content-type
35
+ Dynamic: home-page
36
+ Dynamic: license-file
37
+ Dynamic: provides-extra
38
+ Dynamic: requires-dist
39
+ Dynamic: requires-python
40
+ Dynamic: summary
41
+
42
+ # FAISSx (FAISS Extended)
43
+
44
+ [![Python](https://img.shields.io/badge/python-3.8%2B-blue)](https://github.com/muxi-ai/faissx)
45
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
46
+  
47
+ [![Contributions Welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](./CONTRIBUTING.md)
48
+
49
+ ### A high-performance vector database proxy built with FAISS and ZeroMQ, providing a drop-in replacement for FAISS with scalable, distributed vector operations.
50
+
51
+ ---
52
+
53
+ > [!TIP]
54
+ > #### Support this project by starring this repo on GitHub!
55
+ >
56
+ > More stars → more visibility → more contributors → better features → more robust tool for everyone 🎉
57
+ >
58
+ > <a href="https://github.com/muxi-ai/faissx" target="_blank"><img src="https://img.shields.io/github/stars/muxi-ai/faissx.svg?style=social&label=Star&maxAge=60" alt="Star this repo"></a>
59
+ >
60
+ > Thank you for your support! 🙏
61
+
62
+ ---
63
+
64
+ ## 📚 Table of Contents
65
+
66
+ - [Overview](#-overview)
67
+ - [Getting Started](#-getting-started)
68
+ - [Key Features](#-key-features)
69
+ - [Architecture](#-architecture)
70
+ - [Server Setup](#-server-setup)
71
+ - [Client Implementation](#-client-implementation)
72
+ - [Docker Deployment](#-docker-deployment)
73
+ - [Performance](#-performance)
74
+ - [Development](#-development)
75
+ - [Project Structure](#-project-structure)
76
+ - [License](#-license)
77
+
78
+ ## 👉 Overview
79
+
80
+ **FAISSx** is a lightweight, high-performance vector database proxy that runs [Facebook AI Similarity Search (FAISS)](https://github.com/facebookresearch/faiss) as a service. It provides a client-server architecture for efficient vector operations with significantly better performance than HTTP-based alternatives.
81
+
82
+ The client library acts as a true drop-in replacement for FAISS, meaning you can use it without changing your existing code - simply change your import statements and optionally configure remote execution. FAISSx seamlessly transitions between local FAISS execution and remote server operations based on your configuration.
83
+
84
+ FAISSx is designed for production workloads with multi-tenant support, authentication, and efficient binary messaging protocol using ZeroMQ and msgpack serialization.
85
+
86
+ ---
87
+
88
+ ## 🚀 Getting Started
89
+
90
+ ### Installation
91
+
92
+ ```bash
93
+ # Install from PyPI
94
+ pip install faissx
95
+
96
+ # For development
97
+ git clone https://github.com/muxi-ai/faissx.git
98
+ cd faissx
99
+ pip install -e .
100
+ ```
101
+
102
+ ### Quick Start: Running FAISSx Server
103
+
104
+ ```bash
105
+ # Start the server with default settings
106
+ faissx.server run
107
+
108
+ # Start with custom options
109
+ faissx.server run --port 45678 --data-dir ./data --enable-auth --auth-keys "key1:tenant1,key2:tenant2"
110
+ ```
111
+
112
+ ### Quick Start: Using FAISSx Client
113
+
114
+ **1. Using FAISS locally - no configuration needed**
115
+
116
+ ```python
117
+ from faissx import client as faiss
118
+ import numpy as np
119
+
120
+ # Do FAISS stuff...
121
+ dimension = 128
122
+ index = faiss.IndexFlatL2(dimension)
123
+ vectors = np.random.rand(100, dimension).astype(np.float32)
124
+ index.add(vectors)
125
+ D, I = index.search(np.random.rand(1, dimension).astype(np.float32), k=5)
126
+ ```
127
+
128
+ **2. Using a remote FAISSx server**
129
+
130
+ ```python
131
+ from faissx import client as faiss
132
+ import numpy as np
133
+
134
+ # Connect to a remove FAISSx server
135
+ faiss.configure(
136
+ server="tcp://localhost:45678", # ZeroMQ server address
137
+ api_key="test-key-1", # API key for authentication
138
+ tenant_id="tenant-1" # Tenant ID for multi-tenant isolation
139
+ )
140
+
141
+ # All operations after configure() will use the remote server
142
+ index = faiss.IndexFlatL2(128)
143
+ vectors = np.random.rand(100, 128).astype(np.float32)
144
+ index.add(vectors)
145
+ D, I = index.search(np.random.rand(1, 128).astype(np.float32), k=5)
146
+ ```
147
+
148
+ ---
149
+
150
+ ## ✨ Key Features
151
+
152
+ | Feature | Description |
153
+ |---------|-------------|
154
+ | **📦 Drop-in replacement** | Use your existing FAISS code with minimal changes |
155
+ | **🔄 Binary protocol** | ZeroMQ and msgpack for efficient data transfer |
156
+ | **🌐 Multi-tenant support** | API key authentication for secure multi-tenant deployment |
157
+ | **📊 Vector operations** | Create indices, add vectors, and perform similarity searches |
158
+ | **🚀 High performance** | Significantly faster than HTTP-based alternatives |
159
+ | **📦 Persistent storage** | Optional persistence for vector indices |
160
+ | **🐳 Docker deployment** | Easy deployment with Docker images (Server) |
161
+
162
+ ---
163
+
164
+ ## 🏗️ Architecture
165
+
166
+ FAISSx follows a client-server architecture with high-performance binary communication:
167
+
168
+ ```mermaid
169
+ flowchart TD
170
+ Client[Client Application] --> ClientLib[FAISSx Client Library]
171
+ ClientLib --> ZMQ[ZeroMQ Connection]
172
+ ZMQ --> Server[FAISSx Server]
173
+ Server --> FAISS[FAISS Index Manager]
174
+ Server --> Auth[Authentication]
175
+ Server --> Storage[Storage]
176
+ ```
177
+
178
+ ### Components
179
+
180
+ 1. **Client Library**: Drop-in replacement for FAISS with remote execution capabilities
181
+ - Uses the same API as FAISS
182
+ - Implements local FAISS by default when unconfigured
183
+ - Supports remote execution when explicitly configured
184
+
185
+ 2. **ZeroMQ Communication**: High-performance binary messaging
186
+ - Zero-copy binary protocol
187
+ - Efficient msgpack serialization
188
+ - Low latency, persistent connections
189
+
190
+ 3. **Server**: Main service that handles client requests
191
+ - Multi-tenant support
192
+ - Authentication
193
+ - Vector index management
194
+
195
+ 4. **FAISS Index Manager**: Core component that handles vector operations
196
+ - Creates and manages multiple indices
197
+ - Performs vector addition and search operations
198
+ - Optimizes memory usage
199
+
200
+ ---
201
+
202
+ ## 🖥️ Server Setup
203
+
204
+ FAISSx server can be set up in multiple ways:
205
+
206
+ ### Python API
207
+
208
+ ```python
209
+ from faissx import server
210
+
211
+ server.configure(
212
+ port=45678, # default is 45678
213
+ bind_address="0.0.0.0", # default is "0.0.0.0"
214
+ data_dir="/data", # if omitted, faissx it will use in-memory indices
215
+ auth_keys={"test-key-1": "tenant-1", "test-key-2": "tenant-2"}, # default is empty dict
216
+ enable_auth=True, # default is False
217
+ )
218
+
219
+ # Alternative: load API keys from a JSON file
220
+ # server.configure(
221
+ # port=45678,
222
+ # bind_address="0.0.0.0",
223
+ # auth_file="path/to/auth.json", # JSON file with API keys mapping
224
+ # enable_auth=True,
225
+ # )
226
+
227
+ server.run()
228
+ ```
229
+
230
+ ### Command-Line Interface
231
+
232
+ ```bash
233
+ # Start the server with default settings
234
+ faissx.server run
235
+
236
+ # Start with custom options
237
+ faissx.server run --port 45678 --data-dir ./data --enable-auth --auth-keys "key1:tenant1,key2:tenant2"
238
+
239
+ # Using authentication file instead of inline keys
240
+ faissx.server run --enable-auth --auth-file path/to/auth.json
241
+
242
+ # Show help
243
+ faissx.server run --help
244
+
245
+ # Show version
246
+ faissx.server --version
247
+ ```
248
+
249
+ Note: For authentication, you can provide API keys either inline with `--auth-keys` or from a JSON file with `--auth-file`. The JSON file should have the format `{"api_key1": "tenant1", "api_key2": "tenant2"}`. Only one authentication method can be used at a time.
250
+
251
+ ---
252
+
253
+ ## 📱 Client Implementation
254
+
255
+ The FAISSx client provides a true drop-in replacement for FAISS, with the ability to transparently use either local FAISS or a remote FAISSx server:
256
+
257
+ ### Local Mode (Default)
258
+
259
+ By default, FAISSx will use your local FAISS installation with no extra configuration required:
260
+
261
+ ```python
262
+ # Just change the import - everything else stays the same
263
+ from faissx import client as faiss
264
+ import numpy as np
265
+
266
+ # Create and use FAISS exactly as you would normally
267
+ dimension = 128
268
+ index = faiss.IndexFlatL2(dimension)
269
+ vectors = np.random.random((100, dimension)).astype('float32')
270
+ index.add(vectors)
271
+ D, I = index.search(np.random.random((1, dimension)).astype('float32'), k=5)
272
+ ```
273
+
274
+ ### Remote Mode
275
+
276
+ When you want to use the remote FAISSx server instead of local processing, just add a configure() call:
277
+
278
+ ```python
279
+ from faissx import client as faiss
280
+ import numpy as np
281
+
282
+ # Configure to use the remote server
283
+ faiss.configure(
284
+ server="tcp://your-server:45678",
285
+ api_key="your-api-key",
286
+ tenant_id="your-tenant-id"
287
+ )
288
+
289
+ # After configure(), all operations will use the remote server
290
+ dimension = 128
291
+ index = faiss.IndexFlatL2(dimension)
292
+ vectors = np.random.random((100, dimension)).astype('float32')
293
+ index.add(vectors)
294
+ D, I = index.search(np.random.random((1, dimension)).astype('float32'), k=5)
295
+ ```
296
+
297
+ **Note**: When you call `configure()`, all subsequent operations MUST use the remote server. If the server connection fails, operations will fail - there is no automatic fallback to local mode after `configure()` is called.
298
+
299
+ ### Environment Variables
300
+
301
+ You can configure the client using environment variables:
302
+
303
+ - `FAISSX_SERVER`: ZeroMQ server address (default: `tcp://localhost:45678`)
304
+ - `FAISSX_API_KEY`: API key for authentication
305
+ - `FAISSX_TENANT_ID`: Tenant ID for multi-tenant isolation
306
+
307
+ ---
308
+
309
+ ## 🐳 Docker Deployment
310
+
311
+ FAISSx provides Docker images for easy deployment:
312
+
313
+ ```bash
314
+ # Run with default settings
315
+ docker run -p 45678:45678 muxi/faissx:latest
316
+
317
+ # Run with persistent data and authentication
318
+ docker run -p 45678:45678 \
319
+ -v /path/to/data:/data \
320
+ -v /path/to/auth.json:/auth.json \
321
+ -e FAISSX_DATA_DIR=/data \
322
+ -e FAISSX_AUTH_FILE=/auth.json \
323
+ -e FAISSX_ENABLE_AUTH=true \
324
+ muxi/faissx:latest
325
+ ```
326
+
327
+ You can also use docker-compose:
328
+
329
+ ```bash
330
+ git clone https://github.com/muxi-ai/faissx.git
331
+ cd faissx
332
+ docker-compose up
333
+ ```
334
+
335
+ ---
336
+
337
+ ## 📊 Performance
338
+
339
+ The ZeroMQ-based implementation provides significant performance improvements over HTTP-based alternatives:
340
+
341
+ - Binary protocol minimizes serialization overhead
342
+ - Persistent connections reduce latency
343
+ - Efficient vector operations through direct numpy integration
344
+ - No JSON encoding/decoding overhead for large vector data
345
+
346
+ ---
347
+
348
+ ## 🛠️ Development
349
+
350
+ To set up a development environment:
351
+
352
+ ```bash
353
+ # Clone the repository
354
+ git clone https://github.com/muxi-ai/faissx.git
355
+ cd faissx
356
+
357
+ # Install in development mode with all dependencies
358
+ pip install -e .
359
+
360
+ # Run tests
361
+ pytest
362
+
363
+ # Run examples
364
+ python examples/server_example.py
365
+ ```
366
+
367
+ ### Running Client Tests
368
+
369
+ To run tests for the client component:
370
+
371
+ ```bash
372
+ cd client
373
+ ./run_tests.sh
374
+ ```
375
+
376
+ ### Docker Development
377
+
378
+ To build the Docker images:
379
+
380
+ ```bash
381
+ cd server
382
+ ./build_docker.sh
383
+ ```
384
+
385
+ ---
386
+
387
+ ## 📁 Project Structure
388
+
389
+ ```
390
+ /faissx - Python package source code
391
+ /server - Server implementation
392
+ /client - Client library implementation
393
+ /server - Server utilities, docker configs, tests
394
+ /client - Client utilities and tests
395
+ /examples - Example code for both client and server
396
+ /data - Default directory for FAISS data files
397
+ ```
398
+
399
+ ---
400
+
401
+ ## 📄 License
402
+
403
+ FAISSx is licensed under the [Apache 2.0 license](./LICENSE).
404
+
405
+ ### Why Apache 2.0?
406
+
407
+ I chose the Apache 2.0 license to make FAISSx easy to adopt, integrate, and build on. This license:
408
+
409
+ - Allows you to freely use, modify, and distribute the library in both open-source and proprietary software
410
+ - Encourages wide adoption by individuals, startups, and enterprises alike
411
+ - Includes a clear patent grant for legal peace of mind
412
+ - Enables flexible usage without the complexity of copyleft restrictions
413
+
414
+ ---
415
+
416
+ ## 🙏 Thank You
417
+
418
+ Thank you for trying out FAISSx! Your interest and support mean a lot to this project. Whether you're using it in your applications or just exploring the capabilities, your participation helps drive this project forward.
419
+
420
+ If you find FAISSx useful in your work:
421
+
422
+ - Consider starring the repository on GitHub
423
+ - Share your experiences or use cases with the community
424
+ - Let me know how I can make it better for your needs
425
+
426
+ ~ **Ran Aroussi**