pgvectordb 0.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pgvectordb-0.0.5/.github/workflows/ci.yml +54 -0
- pgvectordb-0.0.5/.github/workflows/docs.yml +33 -0
- pgvectordb-0.0.5/.github/workflows/publish.yml +34 -0
- pgvectordb-0.0.5/.gitignore +46 -0
- pgvectordb-0.0.5/CHANGELOG.md +150 -0
- pgvectordb-0.0.5/LICENSE +21 -0
- pgvectordb-0.0.5/PKG-INFO +367 -0
- pgvectordb-0.0.5/README.md +302 -0
- pgvectordb-0.0.5/TODO.md +236 -0
- pgvectordb-0.0.5/config/.env.example +88 -0
- pgvectordb-0.0.5/docker/Dockerfile +34 -0
- pgvectordb-0.0.5/docker/README.md +44 -0
- pgvectordb-0.0.5/docker/docker-compose.yml +26 -0
- pgvectordb-0.0.5/docker/init.sql +8 -0
- pgvectordb-0.0.5/docs/advanced/configuration.md +88 -0
- pgvectordb-0.0.5/docs/advanced/indexing.md +276 -0
- pgvectordb-0.0.5/docs/api_reference/config.md +3 -0
- pgvectordb-0.0.5/docs/api_reference/exceptions.md +3 -0
- pgvectordb-0.0.5/docs/api_reference/metrics.md +3 -0
- pgvectordb-0.0.5/docs/api_reference/pgvectordb.md +3 -0
- pgvectordb-0.0.5/docs/api_reference/rerankers.md +3 -0
- pgvectordb-0.0.5/docs/api_reference/spaces.md +3 -0
- pgvectordb-0.0.5/docs/getting_started/core_concepts.md +211 -0
- pgvectordb-0.0.5/docs/getting_started/installation.md +98 -0
- pgvectordb-0.0.5/docs/getting_started/quickstart.md +164 -0
- pgvectordb-0.0.5/docs/index.md +75 -0
- pgvectordb-0.0.5/docs/user_guide/analytics_and_diagnostics.md +313 -0
- pgvectordb-0.0.5/docs/user_guide/embeddings_and_spaces.md +243 -0
- pgvectordb-0.0.5/docs/user_guide/filtering.md +152 -0
- pgvectordb-0.0.5/docs/user_guide/langchain_integration.md +107 -0
- pgvectordb-0.0.5/docs/user_guide/metrics_and_evaluation.md +102 -0
- pgvectordb-0.0.5/docs/user_guide/multimodal_search.md +308 -0
- pgvectordb-0.0.5/docs/user_guide/reranking.md +226 -0
- pgvectordb-0.0.5/docs/user_guide/search_and_retrieval.md +276 -0
- pgvectordb-0.0.5/docs/user_guide/vector_store.md +325 -0
- pgvectordb-0.0.5/eval/data/benchmark_dataset_1k.json +10655 -0
- pgvectordb-0.0.5/eval/results/benchmark_results.csv +21 -0
- pgvectordb-0.0.5/eval/results/benchmark_results.json +242 -0
- pgvectordb-0.0.5/eval/results/k_value_analysis_results.json +90 -0
- pgvectordb-0.0.5/eval/scripts/benchmark_all_methods.py +746 -0
- pgvectordb-0.0.5/eval/scripts/generate_synthetic_dataset.py +390 -0
- pgvectordb-0.0.5/eval/scripts/optimize_k.py +1 -3
- pgvectordb-0.0.5/eval/scripts/test_metrics_correctness.py +106 -0
- pgvectordb-0.0.5/examples/01_quickstart.ipynb +465 -0
- pgvectordb-0.0.5/examples/02_advanced_search.ipynb +519 -0
- pgvectordb-0.0.5/examples/03_multimodal_search.ipynb +514 -0
- pgvectordb-0.0.5/examples/04_storage_optimization.ipynb +399 -0
- pgvectordb-0.0.5/examples/05_rag_evaluation.ipynb +297 -0
- pgvectordb-0.0.5/examples/demo.py +272 -0
- pgvectordb-0.0.5/examples/product_search.py +301 -0
- pgvectordb-0.0.5/examples/real_estate_nlq.py +328 -0
- pgvectordb-0.0.5/mkdocs.yml +98 -0
- pgvectordb-0.0.5/notebooks/demo.ipynb +3846 -0
- pgvectordb-0.0.5/notebooks/eval_demo.ipynb +812 -0
- pgvectordb-0.0.5/pgvectordb/__init__.py +193 -0
- pgvectordb-0.0.5/pgvectordb/base.py +478 -0
- pgvectordb-0.0.5/pgvectordb/config.py +192 -0
- pgvectordb-0.0.5/pgvectordb/core.py +483 -0
- pgvectordb-0.0.5/pgvectordb/extensions.py +476 -0
- pgvectordb-0.0.5/pgvectordb/metrics.py +938 -0
- pgvectordb-0.0.5/pgvectordb/mixins/__init__.py +22 -0
- pgvectordb-0.0.5/pgvectordb/mixins/analytics.py +994 -0
- pgvectordb-0.0.5/pgvectordb/mixins/documents.py +1025 -0
- pgvectordb-0.0.5/pgvectordb/mixins/indexing.py +1153 -0
- pgvectordb-0.0.5/pgvectordb/mixins/integrations.py +116 -0
- pgvectordb-0.0.5/pgvectordb/mixins/multimodal.py +831 -0
- pgvectordb-0.0.5/pgvectordb/mixins/storage.py +378 -0
- pgvectordb-0.0.5/pgvectordb/py.typed +1 -0
- pgvectordb-0.0.5/pgvectordb/rerankers.py +637 -0
- pgvectordb-0.0.5/pgvectordb/schema.py +393 -0
- pgvectordb-0.0.5/pgvectordb/search.py +1023 -0
- pgvectordb-0.0.5/pgvectordb/spaces.py +955 -0
- pgvectordb-0.0.5/pyproject.toml +122 -0
- pgvectordb-0.0.5/requirements.txt +18 -0
- pgvectordb-0.0.5/scripts/test_connection.py +435 -0
- pgvectordb-0.0.5/site/404.html +1321 -0
- pgvectordb-0.0.5/site/advanced/configuration/index.html +1689 -0
- pgvectordb-0.0.5/site/advanced/indexing/index.html +2341 -0
- pgvectordb-0.0.5/site/api_reference/config/index.html +2306 -0
- pgvectordb-0.0.5/site/api_reference/exceptions/index.html +3580 -0
- pgvectordb-0.0.5/site/api_reference/metrics/index.html +5715 -0
- pgvectordb-0.0.5/site/api_reference/pgvectordb/index.html +3070 -0
- pgvectordb-0.0.5/site/api_reference/rerankers/index.html +4958 -0
- pgvectordb-0.0.5/site/api_reference/spaces/index.html +8548 -0
- pgvectordb-0.0.5/site/assets/_mkdocstrings.css +237 -0
- pgvectordb-0.0.5/site/assets/images/favicon.png +0 -0
- pgvectordb-0.0.5/site/assets/javascripts/bundle.79ae519e.min.js +16 -0
- pgvectordb-0.0.5/site/assets/javascripts/bundle.79ae519e.min.js.map +7 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.ar.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.da.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.de.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.du.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.el.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.es.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.fi.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.fr.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.he.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.hi.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.hu.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.hy.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.it.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.ja.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.jp.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.kn.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.ko.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.multi.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.nl.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.no.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.pt.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.ro.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.ru.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.sa.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.sv.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.ta.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.te.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.th.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.tr.min.js +18 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.vi.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/min/lunr.zh.min.js +1 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/tinyseg.js +206 -0
- pgvectordb-0.0.5/site/assets/javascripts/lunr/wordcut.js +6708 -0
- pgvectordb-0.0.5/site/assets/javascripts/workers/search.2c215733.min.js +42 -0
- pgvectordb-0.0.5/site/assets/javascripts/workers/search.2c215733.min.js.map +7 -0
- pgvectordb-0.0.5/site/assets/stylesheets/main.484c7ddc.min.css +1 -0
- pgvectordb-0.0.5/site/assets/stylesheets/main.484c7ddc.min.css.map +1 -0
- pgvectordb-0.0.5/site/assets/stylesheets/palette.ab4e12ef.min.css +1 -0
- pgvectordb-0.0.5/site/assets/stylesheets/palette.ab4e12ef.min.css.map +1 -0
- pgvectordb-0.0.5/site/getting_started/core_concepts/index.html +1864 -0
- pgvectordb-0.0.5/site/getting_started/installation/index.html +1638 -0
- pgvectordb-0.0.5/site/getting_started/quickstart/index.html +1747 -0
- pgvectordb-0.0.5/site/index.html +1726 -0
- pgvectordb-0.0.5/site/objects.inv +0 -0
- pgvectordb-0.0.5/site/sitemap.xml +3 -0
- pgvectordb-0.0.5/site/sitemap.xml.gz +0 -0
- pgvectordb-0.0.5/site/user_guide/analytics_and_diagnostics/index.html +2203 -0
- pgvectordb-0.0.5/site/user_guide/embeddings_and_spaces/index.html +2017 -0
- pgvectordb-0.0.5/site/user_guide/filtering/index.html +2103 -0
- pgvectordb-0.0.5/site/user_guide/langchain_integration/index.html +1633 -0
- pgvectordb-0.0.5/site/user_guide/metrics_and_evaluation/index.html +1665 -0
- pgvectordb-0.0.5/site/user_guide/multimodal_search/index.html +2101 -0
- pgvectordb-0.0.5/site/user_guide/reranking/index.html +1968 -0
- pgvectordb-0.0.5/site/user_guide/search_and_retrieval/index.html +2493 -0
- pgvectordb-0.0.5/site/user_guide/vector_store/index.html +2340 -0
- pgvectordb-0.0.5/test/conftest.py +262 -0
- pgvectordb-0.0.5/test/test_analytics.py +278 -0
- pgvectordb-0.0.5/test/test_base.py +316 -0
- pgvectordb-0.0.5/test/test_bedrock_standalone.py +228 -0
- pgvectordb-0.0.5/test/test_documents.py +262 -0
- pgvectordb-0.0.5/test/test_extensions.py +159 -0
- pgvectordb-0.0.5/test/test_indexing.py +279 -0
- pgvectordb-0.0.5/test/test_integrations.py +170 -0
- pgvectordb-0.0.5/test/test_multimodal.py +247 -0
- pgvectordb-0.0.5/test/test_recency_space.py +267 -0
- pgvectordb-0.0.5/test/test_search.py +372 -0
- pgvectordb-0.0.5/test/test_security.py +383 -0
- pgvectordb-0.0.5/test/test_storage.py +211 -0
- pgvectordb-0.0.5/uv.lock +5444 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint-and-check:
|
|
11
|
+
name: Lint & Syntax Check
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
strategy:
|
|
14
|
+
matrix:
|
|
15
|
+
python-version: ["3.10", "3.12", "3.13"]
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- name: Checkout repository
|
|
19
|
+
uses: actions/checkout@v4
|
|
20
|
+
|
|
21
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
22
|
+
uses: actions/setup-python@v5
|
|
23
|
+
with:
|
|
24
|
+
python-version: ${{ matrix.python-version }}
|
|
25
|
+
|
|
26
|
+
- name: Install dependencies
|
|
27
|
+
run: |
|
|
28
|
+
python -m pip install --upgrade pip
|
|
29
|
+
pip install ruff
|
|
30
|
+
|
|
31
|
+
- name: Lint with ruff
|
|
32
|
+
run: ruff check pgvectordb/
|
|
33
|
+
|
|
34
|
+
- name: Syntax check all Python files
|
|
35
|
+
run: |
|
|
36
|
+
python -c "
|
|
37
|
+
import ast, pathlib, sys
|
|
38
|
+
errors = []
|
|
39
|
+
for p in pathlib.Path('pgvectordb').rglob('*.py'):
|
|
40
|
+
try:
|
|
41
|
+
ast.parse(p.read_text(encoding='utf-8'), str(p))
|
|
42
|
+
except SyntaxError as e:
|
|
43
|
+
errors.append(f'SYNTAX ERROR in {p}: {e}')
|
|
44
|
+
if errors:
|
|
45
|
+
for e in errors:
|
|
46
|
+
print(e, file=sys.stderr)
|
|
47
|
+
sys.exit(1)
|
|
48
|
+
print(f'All {sum(1 for _ in pathlib.Path(\"pgvectordb\").rglob(\"*.py\"))} files OK')
|
|
49
|
+
"
|
|
50
|
+
|
|
51
|
+
- name: Verify package builds
|
|
52
|
+
run: |
|
|
53
|
+
pip install hatch
|
|
54
|
+
hatch build
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: docs
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
branches:
|
|
5
|
+
- main
|
|
6
|
+
pull_request:
|
|
7
|
+
branches:
|
|
8
|
+
- main
|
|
9
|
+
workflow_dispatch:
|
|
10
|
+
|
|
11
|
+
permissions:
|
|
12
|
+
contents: write
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
deploy:
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
with:
|
|
20
|
+
fetch-depth: 0
|
|
21
|
+
|
|
22
|
+
- name: Install uv
|
|
23
|
+
uses: astral-sh/setup-uv@v5
|
|
24
|
+
|
|
25
|
+
- name: Set up Python
|
|
26
|
+
run: uv python install 3.10
|
|
27
|
+
|
|
28
|
+
- name: Install dependencies
|
|
29
|
+
run: uv sync --group docs
|
|
30
|
+
|
|
31
|
+
- name: Deploy documentation
|
|
32
|
+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
33
|
+
run: uv run mkdocs gh-deploy --force
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
pypi-publish:
|
|
10
|
+
name: Build and publish Python distribution to PyPI
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
environment:
|
|
13
|
+
name: pypi
|
|
14
|
+
url: https://pypi.org/p/pgvectordb
|
|
15
|
+
permissions:
|
|
16
|
+
id-token: write # IMPORTANT: mandatory for trusted publishing
|
|
17
|
+
contents: read
|
|
18
|
+
steps:
|
|
19
|
+
- name: Checkout repository
|
|
20
|
+
uses: actions/checkout@v4
|
|
21
|
+
|
|
22
|
+
- name: Set up Python
|
|
23
|
+
uses: actions/setup-python@v5
|
|
24
|
+
with:
|
|
25
|
+
python-version: "3.10"
|
|
26
|
+
|
|
27
|
+
- name: Install hatch
|
|
28
|
+
run: python -m pip install --upgrade pip hatch
|
|
29
|
+
|
|
30
|
+
- name: Build a binary wheel and a source tarball
|
|
31
|
+
run: hatch build
|
|
32
|
+
|
|
33
|
+
- name: Publish distribution to PyPI
|
|
34
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.so
|
|
5
|
+
*.egg-info/
|
|
6
|
+
.pytest_cache/
|
|
7
|
+
|
|
8
|
+
# Ruff Cache
|
|
9
|
+
.ruff_cache/
|
|
10
|
+
|
|
11
|
+
# Virtual environments
|
|
12
|
+
venv/
|
|
13
|
+
env/
|
|
14
|
+
ai_env/
|
|
15
|
+
.venv/
|
|
16
|
+
|
|
17
|
+
# IDE
|
|
18
|
+
.vscode/
|
|
19
|
+
.idea/
|
|
20
|
+
*.swp
|
|
21
|
+
|
|
22
|
+
# Environment
|
|
23
|
+
.env
|
|
24
|
+
|
|
25
|
+
# Jupyter
|
|
26
|
+
.ipynb_checkpoints/
|
|
27
|
+
|
|
28
|
+
# Data
|
|
29
|
+
*.db
|
|
30
|
+
*.sqlite
|
|
31
|
+
*.json
|
|
32
|
+
*.csv
|
|
33
|
+
*.pkl
|
|
34
|
+
|
|
35
|
+
# Logs
|
|
36
|
+
*.log
|
|
37
|
+
|
|
38
|
+
# OS
|
|
39
|
+
.DS_Store
|
|
40
|
+
Thumbs.db
|
|
41
|
+
|
|
42
|
+
# Docs
|
|
43
|
+
|
|
44
|
+
# Allow eval folder
|
|
45
|
+
!eval/
|
|
46
|
+
!eval/**/*
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to pgVectorDB are documented here.
|
|
4
|
+
This project adheres to [Semantic Versioning](https://semver.org/).
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## [0.0.5] — 2026-05-09 (PyPI Release Candidate)
|
|
9
|
+
|
|
10
|
+
### Security
|
|
11
|
+
- **Fixed** `set_maintenance_work_mem` — added regex allowlist validation
|
|
12
|
+
(`^\d+\s*(kB|MB|GB|TB)?$`) to prevent SQL injection via the memory value string.
|
|
13
|
+
- **Fixed** `set_parallel_workers` — added `int()` coercion and non-negative
|
|
14
|
+
bounds check on `gather` and `maintenance` parameters.
|
|
15
|
+
|
|
16
|
+
### Bug Fixes
|
|
17
|
+
- **Fixed** `vacuum_analyze` — VACUUM was being executed inside an implicit
|
|
18
|
+
SQLAlchemy transaction, which PostgreSQL rejects. Now issues an explicit
|
|
19
|
+
`COMMIT` before running `VACUUM ANALYZE` / `VACUUM FULL ANALYZE`.
|
|
20
|
+
- **Fixed** `MIN_PG_TEXTSEARCH_VERSION` mismatch — `extensions.py` had `1.0.0`
|
|
21
|
+
while `config.py` had `0.4.0` (correct). Aligned both to `0.4.0` with a
|
|
22
|
+
clarifying comment.
|
|
23
|
+
|
|
24
|
+
### Performance
|
|
25
|
+
- **Fixed** `update_metadata` — replaced N+1 SELECT + UPDATE loop with a single
|
|
26
|
+
bulk `UPDATE … SET langchain_metadata = COALESCE(…, '{}') || :updates WHERE
|
|
27
|
+
langchain_id = ANY(:ids)`. Reduces database round-trips from O(2N) to O(1).
|
|
28
|
+
- **Fixed** `upsert_documents` — hoisted the database connection outside the
|
|
29
|
+
per-document loop. Previously a new connection was opened for every document
|
|
30
|
+
and again for the content-hash update step, risking connection pool exhaustion
|
|
31
|
+
on large batches.
|
|
32
|
+
|
|
33
|
+
### Packaging
|
|
34
|
+
- **Removed** `psycopg2-binary` from core dependencies — already transitively
|
|
35
|
+
required by `langchain-postgres`; explicit listing caused "binary wheel in
|
|
36
|
+
production" warnings.
|
|
37
|
+
- **Removed** `nest-asyncio` from core dependencies — Jupyter-only convenience
|
|
38
|
+
library not appropriate for production. Added as a new `[jupyter]` optional
|
|
39
|
+
extra.
|
|
40
|
+
- **Removed** `flake8` from dev dependencies — `ruff` covers all lint rules and
|
|
41
|
+
is already configured.
|
|
42
|
+
- **Fixed** `ruff==0.15.2` — version does not exist on PyPI. Changed to
|
|
43
|
+
`ruff>=0.4.0`.
|
|
44
|
+
|
|
45
|
+
### Documentation
|
|
46
|
+
- Bumped version to `0.0.5` in `__init__.py` docstring and `__version__`.
|
|
47
|
+
- Updated `README.md`: correct package layout (`pgvectordb/` not `src/`),
|
|
48
|
+
added `pip install pgvectordb` PyPI quickstart, updated method count to 60+.
|
|
49
|
+
- Added this `CHANGELOG.md`.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## [0.0.4] — 2026-05-09
|
|
54
|
+
|
|
55
|
+
### Features
|
|
56
|
+
- **Binary quantization index** (`build_index_binary_quantized`) — creates a
|
|
57
|
+
Hamming-distance HNSW index on `binary_quantize(embedding)` for 87.5%
|
|
58
|
+
storage savings.
|
|
59
|
+
- **Two-stage binary search** (`search_with_binary_rerank`) — fast Hamming
|
|
60
|
+
retrieval followed by full-vector cosine re-ranking.
|
|
61
|
+
- **Subvector indexing** (`build_index_with_subvectors`) and **subvector
|
|
62
|
+
reranking** (`search_with_subvector_rerank`) — supports Matryoshka embeddings
|
|
63
|
+
(OpenAI `text-embedding-3`, Nomic).
|
|
64
|
+
- **Concurrent index builds** (`build_index_concurrent`) — `CREATE INDEX
|
|
65
|
+
CONCURRENTLY` for zero-downtime index creation or replacement.
|
|
66
|
+
- **Index build progress** (`get_index_build_progress`) — polls
|
|
67
|
+
`pg_stat_progress_create_index` for live feedback.
|
|
68
|
+
- **Batch error isolation** (`add_documents_batch_isolated`) — each batch is
|
|
69
|
+
committed independently; failures are reported, not raised by default.
|
|
70
|
+
- **Content-hash upsert** (`upsert_documents`) — deduplicates by MD5 of
|
|
71
|
+
`page_content` using an optional `content_hash` column.
|
|
72
|
+
- **Slow query monitoring** (`get_slow_queries`) — queries
|
|
73
|
+
`pg_stat_statements` for the slowest vector/embedding operations.
|
|
74
|
+
- **BM25 parallel build hint** — `build_bm25_index` accepts
|
|
75
|
+
`max_parallel_maintenance_workers` for faster index creation.
|
|
76
|
+
- **Metadata GIN index** (`create_metadata_index`) — creates `gin_trgm_ops`
|
|
77
|
+
indexes on JSONB metadata fields for fast text filtering.
|
|
78
|
+
- **SQLAlchemy inspector** (`_index_exists`) — uses `run_sync(inspect)` for
|
|
79
|
+
robust index existence checks with a `pg_indexes` fallback.
|
|
80
|
+
|
|
81
|
+
### Bug Fixes
|
|
82
|
+
- Resolved `PostgresSyntaxError` in `search_with_binary_rerank` caused by
|
|
83
|
+
asyncpg misinterpreting `::vector(N)` in named-parameter queries. Embedding
|
|
84
|
+
is now interpolated as a validated float-list literal.
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## [0.0.3] — 2026-02-20
|
|
89
|
+
|
|
90
|
+
### Features
|
|
91
|
+
- **Rerankers module** (`pgvectordb/rerankers.py`) with four backends:
|
|
92
|
+
- `CrossEncoderReranker` (sentence-transformers, local)
|
|
93
|
+
- `CohereReranker` (Cohere Rerank API)
|
|
94
|
+
- `AWSBedrockReranker` (Amazon Bedrock `amazon.rerank-v1:0`)
|
|
95
|
+
- `HuggingFaceReranker` (transformers text-classification pipeline)
|
|
96
|
+
- `create_reranker` factory function
|
|
97
|
+
- **Vector spaces module** (`pgvectordb/spaces.py`) for multimodal search:
|
|
98
|
+
- `TextSpace` — dense embeddings from any LangChain model
|
|
99
|
+
- `NumberSpace` — min-max normalized numeric fields (min/max/similar modes)
|
|
100
|
+
- `CategorySpace` — one-hot categorical encoding with optional negative filter
|
|
101
|
+
- `RecencySpace` — exponential time-decay for timestamp fields
|
|
102
|
+
|
|
103
|
+
### Performance
|
|
104
|
+
- Modularized main class into focused mixins under `pgvectordb/mixins/`:
|
|
105
|
+
`DocumentsMixin`, `IndexingMixin`, `AnalyticsMixin`, `StorageMixin`,
|
|
106
|
+
`MultimodalMixin`.
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## [0.0.2] — 2026-02-20
|
|
111
|
+
|
|
112
|
+
### Features
|
|
113
|
+
- **Half-precision storage** (`create_halfvec_table`) — `halfvec` type
|
|
114
|
+
(2 bytes/dim, 50% savings).
|
|
115
|
+
- **Sparse vector storage** (`create_sparsevec_table`) — `sparsevec` type
|
|
116
|
+
for TF-IDF / one-hot data.
|
|
117
|
+
- **Label filtering** for DiskANN — `add_documents` accepts `labels` for
|
|
118
|
+
partition-based filtered search.
|
|
119
|
+
- **Export / import** (`export_to_json`, `import_from_json`).
|
|
120
|
+
- **Metadata update** (`update_metadata`) — bulk metadata patching.
|
|
121
|
+
- **Document update** (`aupdate_documents`) — in-place content + embedding
|
|
122
|
+
update with optional `update_embeddings=False` for metadata-only changes.
|
|
123
|
+
- **Embedding fallback** (`_embed_documents_with_fallback`) — on batch
|
|
124
|
+
embedding failure falls back to per-document embedding; rate-limit errors
|
|
125
|
+
raise `RateLimitError` immediately.
|
|
126
|
+
- **Iterative scan** (`set_iterative_scan`) — configures HNSW/IVFFlat
|
|
127
|
+
iterative scanning for better recall on filtered queries.
|
|
128
|
+
- **Centroid computation** (`compute_centroid`) — average embedding for a
|
|
129
|
+
filtered or full collection.
|
|
130
|
+
- **Label definitions table** (`create_label_definitions`,
|
|
131
|
+
`get_label_ids_by_names`).
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## [0.0.1] — Initial Release
|
|
136
|
+
|
|
137
|
+
- `pgVectorDB` core class with HNSW, IVFFlat, and DiskANN index support.
|
|
138
|
+
- 10 search methods: `semantic_search`, `keyword_search` (FTS + BM25),
|
|
139
|
+
`hybrid_search`, `ensemble_search`, `trigram_search`,
|
|
140
|
+
`metadata_semantic_search`, `metadata_keyword_search`,
|
|
141
|
+
`metadata_trigram_search`.
|
|
142
|
+
- `ExtensionManager` with graceful degradation for optional extensions.
|
|
143
|
+
- BM25 index (`build_bm25_index`) using `pg_textsearch`.
|
|
144
|
+
- DiskANN build parameter tuning (`set_diskann_build_params`).
|
|
145
|
+
- Query parameter tuning (`set_query_params`) — `hnsw.ef_search`,
|
|
146
|
+
`ivfflat.probes`, `diskann.query_search_list_size`, etc.
|
|
147
|
+
- Analytics: `get_stats`, `get_index_stats`, `explain_query`,
|
|
148
|
+
`benchmark_search_methods`, `validate_collection`, `compute_recall`.
|
|
149
|
+
- Maintenance: `vacuum_analyze`, `areindex`, `adrop_vector_index`.
|
|
150
|
+
- Configuration system (`pgvectordb/config.py`) with `.env` support.
|
pgvectordb-0.0.5/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025-2026
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|