anywidget-vector 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anywidget_vector-0.2.1/.github/workflows/ci.yml +71 -0
- anywidget_vector-0.2.1/.github/workflows/pypi.yml +77 -0
- {anywidget_vector-0.2.0 → anywidget_vector-0.2.1}/PKG-INFO +3 -3
- anywidget_vector-0.2.1/examples/demo.py +274 -0
- {anywidget_vector-0.2.0 → anywidget_vector-0.2.1}/pyproject.toml +3 -3
- {anywidget_vector-0.2.0 → anywidget_vector-0.2.1}/src/anywidget_vector/__init__.py +1 -1
- anywidget_vector-0.2.1/src/anywidget_vector/backends/__init__.py +103 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/chroma/__init__.py +27 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/chroma/client.py +60 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/chroma/converter.py +86 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/grafeo/__init__.py +20 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/grafeo/client.py +33 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/grafeo/converter.py +46 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/lancedb/__init__.py +22 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/lancedb/client.py +56 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/lancedb/converter.py +71 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/pinecone/__init__.py +21 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/pinecone/client.js +45 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/pinecone/converter.py +62 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/qdrant/__init__.py +26 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/qdrant/client.js +61 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/qdrant/converter.py +83 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/weaviate/__init__.py +33 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/weaviate/client.js +50 -0
- anywidget_vector-0.2.1/src/anywidget_vector/backends/weaviate/converter.py +81 -0
- anywidget_vector-0.2.1/src/anywidget_vector/static/icons.js +14 -0
- anywidget_vector-0.2.1/src/anywidget_vector/traitlets.py +84 -0
- anywidget_vector-0.2.1/src/anywidget_vector/ui/__init__.py +206 -0
- anywidget_vector-0.2.1/src/anywidget_vector/ui/canvas.js +521 -0
- anywidget_vector-0.2.1/src/anywidget_vector/ui/constants.js +64 -0
- anywidget_vector-0.2.1/src/anywidget_vector/ui/properties.js +158 -0
- anywidget_vector-0.2.1/src/anywidget_vector/ui/settings.js +265 -0
- anywidget_vector-0.2.1/src/anywidget_vector/ui/styles.css +348 -0
- anywidget_vector-0.2.1/src/anywidget_vector/ui/toolbar.js +117 -0
- anywidget_vector-0.2.1/src/anywidget_vector/widget.py +315 -0
- {anywidget_vector-0.2.0 → anywidget_vector-0.2.1}/uv.lock +23 -24
- anywidget_vector-0.2.0/src/anywidget_vector/widget.py +0 -1261
- {anywidget_vector-0.2.0 → anywidget_vector-0.2.1}/.gitignore +0 -0
- {anywidget_vector-0.2.0 → anywidget_vector-0.2.1}/.python-version +0 -0
- {anywidget_vector-0.2.0 → anywidget_vector-0.2.1}/README.md +0 -0
- {anywidget_vector-0.2.0 → anywidget_vector-0.2.1}/src/anywidget_vector/py.typed +0 -0
- {anywidget_vector-0.2.0 → anywidget_vector-0.2.1}/tests/__init__.py +0 -0
- {anywidget_vector-0.2.0 → anywidget_vector-0.2.1}/tests/test_widget.py +0 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
name: Lint
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- name: Set up Python
|
|
17
|
+
uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: "3.12"
|
|
20
|
+
|
|
21
|
+
- name: Install dependencies
|
|
22
|
+
run: pip install ruff
|
|
23
|
+
|
|
24
|
+
- name: Run ruff check
|
|
25
|
+
run: ruff check .
|
|
26
|
+
|
|
27
|
+
- name: Run ruff format check
|
|
28
|
+
run: ruff format --check .
|
|
29
|
+
|
|
30
|
+
test:
|
|
31
|
+
name: Test (Python ${{ matrix.python-version }})
|
|
32
|
+
runs-on: ubuntu-latest
|
|
33
|
+
strategy:
|
|
34
|
+
matrix:
|
|
35
|
+
python-version: ["3.12", "3.13"]
|
|
36
|
+
steps:
|
|
37
|
+
- uses: actions/checkout@v4
|
|
38
|
+
|
|
39
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
40
|
+
uses: actions/setup-python@v5
|
|
41
|
+
with:
|
|
42
|
+
python-version: ${{ matrix.python-version }}
|
|
43
|
+
|
|
44
|
+
- name: Install dependencies
|
|
45
|
+
run: |
|
|
46
|
+
pip install -e ".[dev]"
|
|
47
|
+
|
|
48
|
+
- name: Run tests
|
|
49
|
+
run: pytest
|
|
50
|
+
|
|
51
|
+
build:
|
|
52
|
+
name: Build
|
|
53
|
+
runs-on: ubuntu-latest
|
|
54
|
+
steps:
|
|
55
|
+
- uses: actions/checkout@v4
|
|
56
|
+
|
|
57
|
+
- name: Set up Python
|
|
58
|
+
uses: actions/setup-python@v5
|
|
59
|
+
with:
|
|
60
|
+
python-version: "3.12"
|
|
61
|
+
|
|
62
|
+
- name: Install build dependencies
|
|
63
|
+
run: pip install build
|
|
64
|
+
|
|
65
|
+
- name: Build package
|
|
66
|
+
run: python -m build
|
|
67
|
+
|
|
68
|
+
- name: Check package
|
|
69
|
+
run: |
|
|
70
|
+
pip install twine
|
|
71
|
+
twine check dist/*
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
inputs:
|
|
8
|
+
target:
|
|
9
|
+
description: 'Publish target'
|
|
10
|
+
required: true
|
|
11
|
+
default: 'testpypi'
|
|
12
|
+
type: choice
|
|
13
|
+
options:
|
|
14
|
+
- testpypi
|
|
15
|
+
- pypi
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
build:
|
|
19
|
+
name: Build distribution
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
steps:
|
|
22
|
+
- uses: actions/checkout@v4
|
|
23
|
+
|
|
24
|
+
- name: Set up Python
|
|
25
|
+
uses: actions/setup-python@v5
|
|
26
|
+
with:
|
|
27
|
+
python-version: "3.12"
|
|
28
|
+
|
|
29
|
+
- name: Install build dependencies
|
|
30
|
+
run: pip install build
|
|
31
|
+
|
|
32
|
+
- name: Build package
|
|
33
|
+
run: python -m build
|
|
34
|
+
|
|
35
|
+
- name: Upload distribution artifacts
|
|
36
|
+
uses: actions/upload-artifact@v4
|
|
37
|
+
with:
|
|
38
|
+
name: dist
|
|
39
|
+
path: dist/
|
|
40
|
+
|
|
41
|
+
publish-testpypi:
|
|
42
|
+
name: Publish to TestPyPI
|
|
43
|
+
needs: build
|
|
44
|
+
if: github.event_name == 'workflow_dispatch' && inputs.target == 'testpypi'
|
|
45
|
+
runs-on: ubuntu-latest
|
|
46
|
+
environment: testpypi
|
|
47
|
+
permissions:
|
|
48
|
+
id-token: write
|
|
49
|
+
steps:
|
|
50
|
+
- name: Download distribution artifacts
|
|
51
|
+
uses: actions/download-artifact@v4
|
|
52
|
+
with:
|
|
53
|
+
name: dist
|
|
54
|
+
path: dist/
|
|
55
|
+
|
|
56
|
+
- name: Publish to TestPyPI
|
|
57
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
58
|
+
with:
|
|
59
|
+
repository-url: https://test.pypi.org/legacy/
|
|
60
|
+
|
|
61
|
+
publish-pypi:
|
|
62
|
+
name: Publish to PyPI
|
|
63
|
+
needs: build
|
|
64
|
+
if: github.event_name == 'release' || (github.event_name == 'workflow_dispatch' && inputs.target == 'pypi')
|
|
65
|
+
runs-on: ubuntu-latest
|
|
66
|
+
environment: pypi
|
|
67
|
+
permissions:
|
|
68
|
+
id-token: write
|
|
69
|
+
steps:
|
|
70
|
+
- name: Download distribution artifacts
|
|
71
|
+
uses: actions/download-artifact@v4
|
|
72
|
+
with:
|
|
73
|
+
name: dist
|
|
74
|
+
path: dist/
|
|
75
|
+
|
|
76
|
+
- name: Publish to PyPI
|
|
77
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: anywidget-vector
|
|
3
|
-
Version: 0.2.
|
|
4
|
-
Summary: Interactive vector visualization
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Interactive 3D vector visualization with query UI for vector databases
|
|
5
5
|
Project-URL: Homepage, https://grafeo.dev/
|
|
6
6
|
Project-URL: Repository, https://github.com/GrafeoDB/anywidget-vector
|
|
7
7
|
Author-email: "S.T. Grond" <widget@grafeo.dev>
|
|
@@ -30,7 +30,7 @@ Provides-Extra: dev
|
|
|
30
30
|
Requires-Dist: marimo>=0.19.7; extra == 'dev'
|
|
31
31
|
Requires-Dist: prek>=0.3.1; extra == 'dev'
|
|
32
32
|
Requires-Dist: pytest>=9.0.2; extra == 'dev'
|
|
33
|
-
Requires-Dist: ruff>=0.
|
|
33
|
+
Requires-Dist: ruff>=0.15.0; extra == 'dev'
|
|
34
34
|
Requires-Dist: ty>=0.0.14; extra == 'dev'
|
|
35
35
|
Provides-Extra: lancedb
|
|
36
36
|
Requires-Dist: lancedb>=0.1; extra == 'lancedb'
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
# /// script
|
|
2
|
+
# requires-python = ">=3.12"
|
|
3
|
+
# dependencies = [
|
|
4
|
+
# "anywidget-vector==0.2.0",
|
|
5
|
+
# "marimo",
|
|
6
|
+
# ]
|
|
7
|
+
# ///
|
|
8
|
+
import marimo
|
|
9
|
+
|
|
10
|
+
__generated_with = "0.18.4"
|
|
11
|
+
app = marimo.App(width="medium")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@app.cell
|
|
15
|
+
def _():
|
|
16
|
+
import marimo as mo
|
|
17
|
+
|
|
18
|
+
return (mo,)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@app.cell
|
|
22
|
+
def _():
|
|
23
|
+
import random
|
|
24
|
+
|
|
25
|
+
from anywidget_vector import VectorSpace
|
|
26
|
+
|
|
27
|
+
return VectorSpace, random
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@app.cell(hide_code=True)
|
|
31
|
+
def _(mo):
|
|
32
|
+
mo.md("""
|
|
33
|
+
# anywidget-vector Demo
|
|
34
|
+
|
|
35
|
+
Interactive 3D vector visualization with distance metrics and query UI.
|
|
36
|
+
""")
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@app.cell
|
|
41
|
+
def _(VectorSpace, random):
|
|
42
|
+
# Generate sample clustered data
|
|
43
|
+
random.seed(42)
|
|
44
|
+
|
|
45
|
+
def generate_cluster(center, n=20, spread=0.15):
|
|
46
|
+
points = []
|
|
47
|
+
for i in range(n):
|
|
48
|
+
points.append(
|
|
49
|
+
{
|
|
50
|
+
"x": center[0] + random.gauss(0, spread),
|
|
51
|
+
"y": center[1] + random.gauss(0, spread),
|
|
52
|
+
"z": center[2] + random.gauss(0, spread),
|
|
53
|
+
"cluster": center[3],
|
|
54
|
+
}
|
|
55
|
+
)
|
|
56
|
+
return points
|
|
57
|
+
|
|
58
|
+
# Create 4 clusters
|
|
59
|
+
clusters = [
|
|
60
|
+
(0.3, 0.3, 0.3, "A"),
|
|
61
|
+
(0.7, 0.3, 0.7, "B"),
|
|
62
|
+
(0.3, 0.7, 0.7, "C"),
|
|
63
|
+
(0.7, 0.7, 0.3, "D"),
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
points = []
|
|
67
|
+
for center in clusters:
|
|
68
|
+
points.extend(generate_cluster(center))
|
|
69
|
+
|
|
70
|
+
# Add IDs and labels
|
|
71
|
+
for i, p in enumerate(points):
|
|
72
|
+
p["id"] = f"point_{i}"
|
|
73
|
+
p["label"] = f"Point {i} ({p['cluster']})"
|
|
74
|
+
p["importance"] = random.random()
|
|
75
|
+
|
|
76
|
+
# Create widget with color by cluster
|
|
77
|
+
widget = VectorSpace(
|
|
78
|
+
points=points,
|
|
79
|
+
color_field="cluster",
|
|
80
|
+
size_field="importance",
|
|
81
|
+
size_range=[0.02, 0.06],
|
|
82
|
+
width=800,
|
|
83
|
+
height=500,
|
|
84
|
+
background="#0f0f1a",
|
|
85
|
+
)
|
|
86
|
+
widget
|
|
87
|
+
return points, widget
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@app.cell(hide_code=True)
|
|
91
|
+
def _(mo):
|
|
92
|
+
mo.md("""
|
|
93
|
+
## Distance Features
|
|
94
|
+
|
|
95
|
+
Click on a point above, then run the cells below to explore distance metrics.
|
|
96
|
+
""")
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@app.cell
|
|
101
|
+
def _(widget):
|
|
102
|
+
# Get the currently selected point
|
|
103
|
+
selected = widget.selected_points
|
|
104
|
+
selected_id = selected[0] if selected else "point_0"
|
|
105
|
+
f"Selected point: {selected_id}"
|
|
106
|
+
return (selected_id,)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@app.cell
|
|
110
|
+
def _(selected_id, widget):
|
|
111
|
+
# Find 5 nearest neighbors using Euclidean distance
|
|
112
|
+
neighbors_euclidean = widget.find_neighbors(selected_id, k=5, metric="euclidean")
|
|
113
|
+
neighbors_euclidean
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@app.cell
|
|
118
|
+
def _(selected_id, widget):
|
|
119
|
+
# Find 5 nearest neighbors using Cosine distance
|
|
120
|
+
neighbors_cosine = widget.find_neighbors(selected_id, k=5, metric="cosine")
|
|
121
|
+
neighbors_cosine
|
|
122
|
+
return
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@app.cell
|
|
126
|
+
def _(VectorSpace, points):
|
|
127
|
+
# Create a second widget showing neighbor connections
|
|
128
|
+
widget2 = VectorSpace(
|
|
129
|
+
points=points,
|
|
130
|
+
color_field="cluster",
|
|
131
|
+
width=800,
|
|
132
|
+
height=500,
|
|
133
|
+
background="#0f0f1a",
|
|
134
|
+
# Enable k-nearest neighbor connections
|
|
135
|
+
show_connections=True,
|
|
136
|
+
k_neighbors=3,
|
|
137
|
+
distance_metric="euclidean",
|
|
138
|
+
connection_color="#44ff88",
|
|
139
|
+
connection_opacity=0.4,
|
|
140
|
+
)
|
|
141
|
+
widget2
|
|
142
|
+
return
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@app.cell(hide_code=True)
|
|
146
|
+
def _(mo):
|
|
147
|
+
mo.md("""
|
|
148
|
+
## Color by Distance
|
|
149
|
+
|
|
150
|
+
The widget below colors points by distance from a reference point.
|
|
151
|
+
""")
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@app.cell
|
|
156
|
+
def _(VectorSpace, points):
|
|
157
|
+
# Create widget and color by distance from first point
|
|
158
|
+
widget3 = VectorSpace(
|
|
159
|
+
points=[dict(p) for p in points], # Copy points
|
|
160
|
+
width=800,
|
|
161
|
+
height=500,
|
|
162
|
+
background="#0f0f1a",
|
|
163
|
+
color_scale="plasma",
|
|
164
|
+
)
|
|
165
|
+
widget3.color_by_distance("point_0", metric="euclidean")
|
|
166
|
+
widget3.show_neighbors("point_0", k=5)
|
|
167
|
+
widget3
|
|
168
|
+
return
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@app.cell(hide_code=True)
|
|
172
|
+
def _(mo):
|
|
173
|
+
mo.md("""
|
|
174
|
+
## Different Shapes
|
|
175
|
+
|
|
176
|
+
Using shapes to encode an additional dimension.
|
|
177
|
+
""")
|
|
178
|
+
return
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@app.cell
|
|
182
|
+
def _(VectorSpace, points):
|
|
183
|
+
# Create widget with shape mapping
|
|
184
|
+
widget4 = VectorSpace(
|
|
185
|
+
points=points,
|
|
186
|
+
color_field="cluster",
|
|
187
|
+
shape_field="cluster",
|
|
188
|
+
shape_map={
|
|
189
|
+
"A": "sphere",
|
|
190
|
+
"B": "cube",
|
|
191
|
+
"C": "cone",
|
|
192
|
+
"D": "octahedron",
|
|
193
|
+
},
|
|
194
|
+
size_range=[0.04, 0.04], # Fixed size
|
|
195
|
+
width=800,
|
|
196
|
+
height=500,
|
|
197
|
+
background="#0f0f1a",
|
|
198
|
+
)
|
|
199
|
+
widget4
|
|
200
|
+
return
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
@app.cell(hide_code=True)
|
|
204
|
+
def _(mo):
|
|
205
|
+
mo.md("""
|
|
206
|
+
## Query UI
|
|
207
|
+
|
|
208
|
+
The widget below shows the query interface for vector database backends.
|
|
209
|
+
Enable `show_query_input=True` to show the toolbar.
|
|
210
|
+
|
|
211
|
+
**Supported backends:**
|
|
212
|
+
- Browser-side (REST API): Qdrant, Pinecone, Weaviate
|
|
213
|
+
- Python-side: Chroma, LanceDB, Grafeo
|
|
214
|
+
|
|
215
|
+
**Query types:**
|
|
216
|
+
- Text Search (requires embedding API)
|
|
217
|
+
- Find Similar (by vector ID)
|
|
218
|
+
- Raw Vector ([0.1, 0.2, ...])
|
|
219
|
+
- Filter (JSON filter expressions)
|
|
220
|
+
""")
|
|
221
|
+
return
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
@app.cell
|
|
225
|
+
def _(VectorSpace, points):
|
|
226
|
+
# Create widget with query UI enabled
|
|
227
|
+
widget5 = VectorSpace(
|
|
228
|
+
points=points,
|
|
229
|
+
color_field="cluster",
|
|
230
|
+
width=800,
|
|
231
|
+
height=500,
|
|
232
|
+
background="#0f0f1a",
|
|
233
|
+
# Enable query UI
|
|
234
|
+
show_query_input=True,
|
|
235
|
+
show_settings=True,
|
|
236
|
+
# Default to Qdrant backend
|
|
237
|
+
backend="qdrant",
|
|
238
|
+
)
|
|
239
|
+
widget5
|
|
240
|
+
return
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
@app.cell(hide_code=True)
|
|
244
|
+
def _(mo):
|
|
245
|
+
mo.md("""
|
|
246
|
+
### Python-side Backend Example
|
|
247
|
+
|
|
248
|
+
For Python-side backends like Chroma, configure the client directly:
|
|
249
|
+
|
|
250
|
+
```python
|
|
251
|
+
import chromadb
|
|
252
|
+
|
|
253
|
+
client = chromadb.Client()
|
|
254
|
+
collection = client.get_or_create_collection("my_vectors")
|
|
255
|
+
|
|
256
|
+
widget = VectorSpace(
|
|
257
|
+
show_query_input=True,
|
|
258
|
+
show_settings=True,
|
|
259
|
+
)
|
|
260
|
+
widget.set_backend("chroma", collection)
|
|
261
|
+
|
|
262
|
+
# Optional: set custom embedding function for text search
|
|
263
|
+
def my_embed(text):
|
|
264
|
+
# Your embedding logic here
|
|
265
|
+
return [0.1, 0.2, 0.3, ...]
|
|
266
|
+
|
|
267
|
+
widget.set_embedding(my_embed)
|
|
268
|
+
```
|
|
269
|
+
""")
|
|
270
|
+
return
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
if __name__ == "__main__":
|
|
274
|
+
app.run()
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "anywidget-vector"
|
|
3
|
-
version = "0.2.
|
|
4
|
-
description = "Interactive vector visualization
|
|
3
|
+
version = "0.2.1"
|
|
4
|
+
description = "Interactive 3D vector visualization with query UI for vector databases"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { text = "Apache-2.0" }
|
|
7
7
|
requires-python = ">=3.12"
|
|
@@ -28,7 +28,7 @@ dependencies = [
|
|
|
28
28
|
dev = [
|
|
29
29
|
"prek>=0.3.1",
|
|
30
30
|
"pytest>=9.0.2",
|
|
31
|
-
"ruff>=0.
|
|
31
|
+
"ruff>=0.15.0",
|
|
32
32
|
"ty>=0.0.14",
|
|
33
33
|
"marimo>=0.19.7",
|
|
34
34
|
]
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""Vector database backend adapters.
|
|
2
|
+
|
|
3
|
+
Each backend has its own query format matching its native API:
|
|
4
|
+
- Qdrant: JSON with vector, filter, limit
|
|
5
|
+
- Pinecone: JSON with vector, filter, topK
|
|
6
|
+
- Weaviate: GraphQL
|
|
7
|
+
- Chroma: Python dict (query_embeddings, where, n_results)
|
|
8
|
+
- LanceDB: SQL-like expressions
|
|
9
|
+
- Grafeo: Grafeo query format
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import Any, Protocol, runtime_checkable
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@runtime_checkable
|
|
18
|
+
class VectorBackend(Protocol):
|
|
19
|
+
"""Protocol for vector database backends."""
|
|
20
|
+
|
|
21
|
+
name: str
|
|
22
|
+
side: str # "browser" or "python"
|
|
23
|
+
query_language: str # e.g., "json", "graphql", "sql", "python"
|
|
24
|
+
|
|
25
|
+
def execute(self, query: str, config: dict[str, Any]) -> list[dict[str, Any]]:
|
|
26
|
+
"""Execute a query and return points."""
|
|
27
|
+
...
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# Backend registry with metadata
|
|
31
|
+
BACKENDS: dict[str, dict[str, Any]] = {
|
|
32
|
+
"qdrant": {
|
|
33
|
+
"name": "Qdrant",
|
|
34
|
+
"side": "browser",
|
|
35
|
+
"query_language": "json",
|
|
36
|
+
"placeholder": '{"vector": [...], "limit": 10}',
|
|
37
|
+
"help": "JSON: vector, filter, limit, recommend, ids",
|
|
38
|
+
},
|
|
39
|
+
"pinecone": {
|
|
40
|
+
"name": "Pinecone",
|
|
41
|
+
"side": "browser",
|
|
42
|
+
"query_language": "json",
|
|
43
|
+
"placeholder": '{"vector": [...], "topK": 10}',
|
|
44
|
+
"help": "JSON: vector, filter, topK, namespace",
|
|
45
|
+
},
|
|
46
|
+
"weaviate": {
|
|
47
|
+
"name": "Weaviate",
|
|
48
|
+
"side": "browser",
|
|
49
|
+
"query_language": "graphql",
|
|
50
|
+
"placeholder": "{ Get { Class(limit: 10) { ... } } }",
|
|
51
|
+
"help": "GraphQL with nearVector, nearText, where",
|
|
52
|
+
},
|
|
53
|
+
"chroma": {
|
|
54
|
+
"name": "Chroma",
|
|
55
|
+
"side": "python",
|
|
56
|
+
"query_language": "dict",
|
|
57
|
+
"placeholder": '{"query_embeddings": [...], "n_results": 10}',
|
|
58
|
+
"help": "Dict: query_embeddings, where, n_results",
|
|
59
|
+
},
|
|
60
|
+
"lancedb": {
|
|
61
|
+
"name": "LanceDB",
|
|
62
|
+
"side": "python",
|
|
63
|
+
"query_language": "sql",
|
|
64
|
+
"placeholder": "category = 'tech' AND year > 2020",
|
|
65
|
+
"help": "SQL WHERE clause for filtering",
|
|
66
|
+
},
|
|
67
|
+
"grafeo": {
|
|
68
|
+
"name": "Grafeo",
|
|
69
|
+
"side": "python",
|
|
70
|
+
"query_language": "grafeo",
|
|
71
|
+
"placeholder": "MATCH (n:Vector) RETURN n LIMIT 10",
|
|
72
|
+
"help": "Grafeo query language",
|
|
73
|
+
},
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_backend_info(name: str) -> dict[str, Any] | None:
|
|
78
|
+
"""Get backend configuration by name."""
|
|
79
|
+
return BACKENDS.get(name)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def is_browser_backend(name: str) -> bool:
|
|
83
|
+
"""Check if backend runs in browser."""
|
|
84
|
+
info = BACKENDS.get(name)
|
|
85
|
+
return info is not None and info.get("side") == "browser"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def is_python_backend(name: str) -> bool:
|
|
89
|
+
"""Check if backend runs in Python."""
|
|
90
|
+
info = BACKENDS.get(name)
|
|
91
|
+
return info is not None and info.get("side") == "python"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_query_placeholder(name: str) -> str:
|
|
95
|
+
"""Get example query placeholder for backend."""
|
|
96
|
+
info = BACKENDS.get(name)
|
|
97
|
+
return info.get("placeholder", "") if info else ""
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def get_query_help(name: str) -> str:
|
|
101
|
+
"""Get query help text for backend."""
|
|
102
|
+
info = BACKENDS.get(name)
|
|
103
|
+
return info.get("help", "") if info else ""
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Chroma backend adapter.
|
|
2
|
+
|
|
3
|
+
Query Format (dict):
|
|
4
|
+
# Query by embeddings
|
|
5
|
+
{"query_embeddings": [[0.1, 0.2, ...]], "n_results": 10}
|
|
6
|
+
|
|
7
|
+
# With where filter
|
|
8
|
+
{
|
|
9
|
+
"query_embeddings": [[0.1, 0.2, ...]],
|
|
10
|
+
"where": {"category": "tech"},
|
|
11
|
+
"n_results": 10
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
# Get by IDs
|
|
15
|
+
{"ids": ["id1", "id2"]}
|
|
16
|
+
|
|
17
|
+
# Get with filter only
|
|
18
|
+
{"where": {"category": "tech"}, "limit": 100}
|
|
19
|
+
|
|
20
|
+
Where operators: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin
|
|
21
|
+
Combined: $and, $or
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from anywidget_vector.backends.chroma.client import execute_query
|
|
25
|
+
from anywidget_vector.backends.chroma.converter import build_where, to_points
|
|
26
|
+
|
|
27
|
+
__all__ = ["to_points", "build_where", "execute_query"]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Chroma Python client."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from anywidget_vector.backends.chroma.converter import to_points
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def execute_query(
|
|
12
|
+
collection: Any,
|
|
13
|
+
query: str | dict[str, Any],
|
|
14
|
+
) -> list[dict[str, Any]]:
|
|
15
|
+
"""Execute query against Chroma collection.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
collection: Chroma collection object
|
|
19
|
+
query: Query dict or JSON string
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
List of points
|
|
23
|
+
"""
|
|
24
|
+
if isinstance(query, str):
|
|
25
|
+
query = json.loads(query)
|
|
26
|
+
|
|
27
|
+
# Get by IDs
|
|
28
|
+
if "ids" in query:
|
|
29
|
+
response = collection.get(
|
|
30
|
+
ids=query["ids"],
|
|
31
|
+
include=["embeddings", "metadatas", "documents"],
|
|
32
|
+
)
|
|
33
|
+
return to_points(response)
|
|
34
|
+
|
|
35
|
+
# Query by embeddings
|
|
36
|
+
if "query_embeddings" in query:
|
|
37
|
+
response = collection.query(
|
|
38
|
+
query_embeddings=query["query_embeddings"],
|
|
39
|
+
n_results=query.get("n_results", 10),
|
|
40
|
+
where=query.get("where"),
|
|
41
|
+
where_document=query.get("where_document"),
|
|
42
|
+
include=["embeddings", "metadatas", "documents", "distances"],
|
|
43
|
+
)
|
|
44
|
+
return to_points(response)
|
|
45
|
+
|
|
46
|
+
# Get with filter
|
|
47
|
+
if "where" in query:
|
|
48
|
+
response = collection.get(
|
|
49
|
+
where=query["where"],
|
|
50
|
+
limit=query.get("limit", 100),
|
|
51
|
+
include=["embeddings", "metadatas", "documents"],
|
|
52
|
+
)
|
|
53
|
+
return to_points(response)
|
|
54
|
+
|
|
55
|
+
# Get all (with limit)
|
|
56
|
+
response = collection.get(
|
|
57
|
+
limit=query.get("limit", 100),
|
|
58
|
+
include=["embeddings", "metadatas", "documents"],
|
|
59
|
+
)
|
|
60
|
+
return to_points(response)
|