anywidget-vector 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anywidget_vector/__init__.py +1 -1
- anywidget_vector/backends/__init__.py +103 -0
- anywidget_vector/backends/chroma/__init__.py +27 -0
- anywidget_vector/backends/chroma/client.py +60 -0
- anywidget_vector/backends/chroma/converter.py +86 -0
- anywidget_vector/backends/grafeo/__init__.py +20 -0
- anywidget_vector/backends/grafeo/client.py +33 -0
- anywidget_vector/backends/grafeo/converter.py +46 -0
- anywidget_vector/backends/lancedb/__init__.py +22 -0
- anywidget_vector/backends/lancedb/client.py +56 -0
- anywidget_vector/backends/lancedb/converter.py +71 -0
- anywidget_vector/backends/pinecone/__init__.py +21 -0
- anywidget_vector/backends/pinecone/client.js +45 -0
- anywidget_vector/backends/pinecone/converter.py +62 -0
- anywidget_vector/backends/qdrant/__init__.py +26 -0
- anywidget_vector/backends/qdrant/client.js +61 -0
- anywidget_vector/backends/qdrant/converter.py +83 -0
- anywidget_vector/backends/weaviate/__init__.py +33 -0
- anywidget_vector/backends/weaviate/client.js +50 -0
- anywidget_vector/backends/weaviate/converter.py +81 -0
- anywidget_vector/static/icons.js +14 -0
- anywidget_vector/traitlets.py +84 -0
- anywidget_vector/ui/__init__.py +206 -0
- anywidget_vector/ui/canvas.js +521 -0
- anywidget_vector/ui/constants.js +64 -0
- anywidget_vector/ui/properties.js +158 -0
- anywidget_vector/ui/settings.js +265 -0
- anywidget_vector/ui/styles.css +348 -0
- anywidget_vector/ui/toolbar.js +117 -0
- anywidget_vector/widget.py +187 -850
- {anywidget_vector-0.1.0.dist-info → anywidget_vector-0.2.1.dist-info}/METADATA +70 -3
- anywidget_vector-0.2.1.dist-info/RECORD +34 -0
- anywidget_vector-0.1.0.dist-info/RECORD +0 -6
- {anywidget_vector-0.1.0.dist-info → anywidget_vector-0.2.1.dist-info}/WHEEL +0 -0
anywidget_vector/__init__.py
CHANGED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""Vector database backend adapters.
|
|
2
|
+
|
|
3
|
+
Each backend has its own query format matching its native API:
|
|
4
|
+
- Qdrant: JSON with vector, filter, limit
|
|
5
|
+
- Pinecone: JSON with vector, filter, topK
|
|
6
|
+
- Weaviate: GraphQL
|
|
7
|
+
- Chroma: Python dict (query_embeddings, where, n_results)
|
|
8
|
+
- LanceDB: SQL-like expressions
|
|
9
|
+
- Grafeo: Grafeo query format
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import Any, Protocol, runtime_checkable
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@runtime_checkable
|
|
18
|
+
class VectorBackend(Protocol):
|
|
19
|
+
"""Protocol for vector database backends."""
|
|
20
|
+
|
|
21
|
+
name: str
|
|
22
|
+
side: str # "browser" or "python"
|
|
23
|
+
query_language: str # e.g., "json", "graphql", "sql", "python"
|
|
24
|
+
|
|
25
|
+
def execute(self, query: str, config: dict[str, Any]) -> list[dict[str, Any]]:
|
|
26
|
+
"""Execute a query and return points."""
|
|
27
|
+
...
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# Backend registry with metadata
|
|
31
|
+
BACKENDS: dict[str, dict[str, Any]] = {
|
|
32
|
+
"qdrant": {
|
|
33
|
+
"name": "Qdrant",
|
|
34
|
+
"side": "browser",
|
|
35
|
+
"query_language": "json",
|
|
36
|
+
"placeholder": '{"vector": [...], "limit": 10}',
|
|
37
|
+
"help": "JSON: vector, filter, limit, recommend, ids",
|
|
38
|
+
},
|
|
39
|
+
"pinecone": {
|
|
40
|
+
"name": "Pinecone",
|
|
41
|
+
"side": "browser",
|
|
42
|
+
"query_language": "json",
|
|
43
|
+
"placeholder": '{"vector": [...], "topK": 10}',
|
|
44
|
+
"help": "JSON: vector, filter, topK, namespace",
|
|
45
|
+
},
|
|
46
|
+
"weaviate": {
|
|
47
|
+
"name": "Weaviate",
|
|
48
|
+
"side": "browser",
|
|
49
|
+
"query_language": "graphql",
|
|
50
|
+
"placeholder": "{ Get { Class(limit: 10) { ... } } }",
|
|
51
|
+
"help": "GraphQL with nearVector, nearText, where",
|
|
52
|
+
},
|
|
53
|
+
"chroma": {
|
|
54
|
+
"name": "Chroma",
|
|
55
|
+
"side": "python",
|
|
56
|
+
"query_language": "dict",
|
|
57
|
+
"placeholder": '{"query_embeddings": [...], "n_results": 10}',
|
|
58
|
+
"help": "Dict: query_embeddings, where, n_results",
|
|
59
|
+
},
|
|
60
|
+
"lancedb": {
|
|
61
|
+
"name": "LanceDB",
|
|
62
|
+
"side": "python",
|
|
63
|
+
"query_language": "sql",
|
|
64
|
+
"placeholder": "category = 'tech' AND year > 2020",
|
|
65
|
+
"help": "SQL WHERE clause for filtering",
|
|
66
|
+
},
|
|
67
|
+
"grafeo": {
|
|
68
|
+
"name": "Grafeo",
|
|
69
|
+
"side": "python",
|
|
70
|
+
"query_language": "grafeo",
|
|
71
|
+
"placeholder": "MATCH (n:Vector) RETURN n LIMIT 10",
|
|
72
|
+
"help": "Grafeo query language",
|
|
73
|
+
},
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_backend_info(name: str) -> dict[str, Any] | None:
|
|
78
|
+
"""Get backend configuration by name."""
|
|
79
|
+
return BACKENDS.get(name)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def is_browser_backend(name: str) -> bool:
|
|
83
|
+
"""Check if backend runs in browser."""
|
|
84
|
+
info = BACKENDS.get(name)
|
|
85
|
+
return info is not None and info.get("side") == "browser"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def is_python_backend(name: str) -> bool:
|
|
89
|
+
"""Check if backend runs in Python."""
|
|
90
|
+
info = BACKENDS.get(name)
|
|
91
|
+
return info is not None and info.get("side") == "python"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_query_placeholder(name: str) -> str:
|
|
95
|
+
"""Get example query placeholder for backend."""
|
|
96
|
+
info = BACKENDS.get(name)
|
|
97
|
+
return info.get("placeholder", "") if info else ""
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def get_query_help(name: str) -> str:
|
|
101
|
+
"""Get query help text for backend."""
|
|
102
|
+
info = BACKENDS.get(name)
|
|
103
|
+
return info.get("help", "") if info else ""
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Chroma backend adapter.
|
|
2
|
+
|
|
3
|
+
Query Format (dict):
|
|
4
|
+
# Query by embeddings
|
|
5
|
+
{"query_embeddings": [[0.1, 0.2, ...]], "n_results": 10}
|
|
6
|
+
|
|
7
|
+
# With where filter
|
|
8
|
+
{
|
|
9
|
+
"query_embeddings": [[0.1, 0.2, ...]],
|
|
10
|
+
"where": {"category": "tech"},
|
|
11
|
+
"n_results": 10
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
# Get by IDs
|
|
15
|
+
{"ids": ["id1", "id2"]}
|
|
16
|
+
|
|
17
|
+
# Get with filter only
|
|
18
|
+
{"where": {"category": "tech"}, "limit": 100}
|
|
19
|
+
|
|
20
|
+
Where operators: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin
|
|
21
|
+
Combined: $and, $or
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from anywidget_vector.backends.chroma.client import execute_query
|
|
25
|
+
from anywidget_vector.backends.chroma.converter import build_where, to_points
|
|
26
|
+
|
|
27
|
+
__all__ = ["to_points", "build_where", "execute_query"]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Chroma Python client."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from anywidget_vector.backends.chroma.converter import to_points
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def execute_query(
|
|
12
|
+
collection: Any,
|
|
13
|
+
query: str | dict[str, Any],
|
|
14
|
+
) -> list[dict[str, Any]]:
|
|
15
|
+
"""Execute query against Chroma collection.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
collection: Chroma collection object
|
|
19
|
+
query: Query dict or JSON string
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
List of points
|
|
23
|
+
"""
|
|
24
|
+
if isinstance(query, str):
|
|
25
|
+
query = json.loads(query)
|
|
26
|
+
|
|
27
|
+
# Get by IDs
|
|
28
|
+
if "ids" in query:
|
|
29
|
+
response = collection.get(
|
|
30
|
+
ids=query["ids"],
|
|
31
|
+
include=["embeddings", "metadatas", "documents"],
|
|
32
|
+
)
|
|
33
|
+
return to_points(response)
|
|
34
|
+
|
|
35
|
+
# Query by embeddings
|
|
36
|
+
if "query_embeddings" in query:
|
|
37
|
+
response = collection.query(
|
|
38
|
+
query_embeddings=query["query_embeddings"],
|
|
39
|
+
n_results=query.get("n_results", 10),
|
|
40
|
+
where=query.get("where"),
|
|
41
|
+
where_document=query.get("where_document"),
|
|
42
|
+
include=["embeddings", "metadatas", "documents", "distances"],
|
|
43
|
+
)
|
|
44
|
+
return to_points(response)
|
|
45
|
+
|
|
46
|
+
# Get with filter
|
|
47
|
+
if "where" in query:
|
|
48
|
+
response = collection.get(
|
|
49
|
+
where=query["where"],
|
|
50
|
+
limit=query.get("limit", 100),
|
|
51
|
+
include=["embeddings", "metadatas", "documents"],
|
|
52
|
+
)
|
|
53
|
+
return to_points(response)
|
|
54
|
+
|
|
55
|
+
# Get all (with limit)
|
|
56
|
+
response = collection.get(
|
|
57
|
+
limit=query.get("limit", 100),
|
|
58
|
+
include=["embeddings", "metadatas", "documents"],
|
|
59
|
+
)
|
|
60
|
+
return to_points(response)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Chroma result conversion and filter building."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def to_points(response: dict[str, Any]) -> list[dict[str, Any]]:
|
|
9
|
+
"""Convert Chroma response to points format."""
|
|
10
|
+
# Handle query results (nested lists) vs get results (flat lists)
|
|
11
|
+
is_query = "distances" in response
|
|
12
|
+
|
|
13
|
+
ids = response.get("ids", [[]])[0] if is_query else response.get("ids", [])
|
|
14
|
+
embeddings = response.get("embeddings", [[]])[0] if is_query else response.get("embeddings", [])
|
|
15
|
+
metadatas = response.get("metadatas", [[]])[0] if is_query else response.get("metadatas", [])
|
|
16
|
+
distances = response.get("distances", [[]])[0] if is_query else []
|
|
17
|
+
documents = response.get("documents", [[]])[0] if is_query else response.get("documents", [])
|
|
18
|
+
|
|
19
|
+
points = []
|
|
20
|
+
for i, id_ in enumerate(ids):
|
|
21
|
+
point: dict[str, Any] = {"id": str(id_)}
|
|
22
|
+
|
|
23
|
+
# Score from distance (invert since distance = dissimilarity)
|
|
24
|
+
if distances and i < len(distances):
|
|
25
|
+
point["score"] = 1 / (1 + distances[i])
|
|
26
|
+
|
|
27
|
+
# Embeddings -> coordinates
|
|
28
|
+
if embeddings and i < len(embeddings) and embeddings[i]:
|
|
29
|
+
vec = embeddings[i]
|
|
30
|
+
point["x"] = float(vec[0]) if len(vec) > 0 else 0
|
|
31
|
+
point["y"] = float(vec[1]) if len(vec) > 1 else 0
|
|
32
|
+
point["z"] = float(vec[2]) if len(vec) > 2 else 0
|
|
33
|
+
point["vector"] = vec
|
|
34
|
+
|
|
35
|
+
# Document content
|
|
36
|
+
if documents and i < len(documents) and documents[i]:
|
|
37
|
+
point["document"] = documents[i]
|
|
38
|
+
|
|
39
|
+
# Metadata
|
|
40
|
+
if metadatas and i < len(metadatas) and metadatas[i]:
|
|
41
|
+
point.update(metadatas[i])
|
|
42
|
+
|
|
43
|
+
points.append(point)
|
|
44
|
+
|
|
45
|
+
return points
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def build_where(conditions: list[tuple[str, str, Any]]) -> dict[str, Any]:
|
|
49
|
+
"""Build Chroma where filter from conditions.
|
|
50
|
+
|
|
51
|
+
Chroma uses MongoDB-style operators: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin
|
|
52
|
+
"""
|
|
53
|
+
op_map = {
|
|
54
|
+
"=": "$eq",
|
|
55
|
+
"!=": "$ne",
|
|
56
|
+
">": "$gt",
|
|
57
|
+
">=": "$gte",
|
|
58
|
+
"<": "$lt",
|
|
59
|
+
"<=": "$lte",
|
|
60
|
+
":": "$in",
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if len(conditions) == 0:
|
|
64
|
+
return {}
|
|
65
|
+
|
|
66
|
+
if len(conditions) == 1:
|
|
67
|
+
field, op, value = conditions[0]
|
|
68
|
+
chroma_op = op_map.get(op, "$eq")
|
|
69
|
+
if chroma_op == "$eq":
|
|
70
|
+
return {field: value}
|
|
71
|
+
if chroma_op == "$in" and not isinstance(value, list):
|
|
72
|
+
value = [value]
|
|
73
|
+
return {field: {chroma_op: value}}
|
|
74
|
+
|
|
75
|
+
# Multiple conditions: $and
|
|
76
|
+
and_list = []
|
|
77
|
+
for field, op, value in conditions:
|
|
78
|
+
chroma_op = op_map.get(op, "$eq")
|
|
79
|
+
if chroma_op == "$eq":
|
|
80
|
+
and_list.append({field: value})
|
|
81
|
+
else:
|
|
82
|
+
if chroma_op == "$in" and not isinstance(value, list):
|
|
83
|
+
value = [value]
|
|
84
|
+
and_list.append({field: {chroma_op: value}})
|
|
85
|
+
|
|
86
|
+
return {"$and": and_list}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Grafeo backend adapter.
|
|
2
|
+
|
|
3
|
+
Query Format (Grafeo Query Language):
|
|
4
|
+
# Get vectors with limit
|
|
5
|
+
MATCH (n:Vector) RETURN n LIMIT 10
|
|
6
|
+
|
|
7
|
+
# Filter by property
|
|
8
|
+
MATCH (n:Vector {category: "tech"}) RETURN n LIMIT 10
|
|
9
|
+
|
|
10
|
+
# Vector similarity (placeholder - depends on Grafeo API)
|
|
11
|
+
MATCH (n:Vector) WHERE similarity(n.embedding, $vector) > 0.8 RETURN n
|
|
12
|
+
|
|
13
|
+
# Connected nodes
|
|
14
|
+
MATCH (n:Vector)-[r]->(m) RETURN n, r, m LIMIT 10
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from anywidget_vector.backends.grafeo.client import execute_query
|
|
18
|
+
from anywidget_vector.backends.grafeo.converter import to_points
|
|
19
|
+
|
|
20
|
+
__all__ = ["to_points", "execute_query"]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Grafeo Python client."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from anywidget_vector.backends.grafeo.converter import to_points
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def execute_query(
|
|
11
|
+
client: Any,
|
|
12
|
+
query: str,
|
|
13
|
+
) -> list[dict[str, Any]]:
|
|
14
|
+
"""Execute query against Grafeo.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
client: Grafeo client/session object
|
|
18
|
+
query: Grafeo query string
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
List of points
|
|
22
|
+
"""
|
|
23
|
+
# Execute the query - actual API depends on Grafeo implementation
|
|
24
|
+
if hasattr(client, "query"):
|
|
25
|
+
results = client.query(query)
|
|
26
|
+
elif hasattr(client, "run"):
|
|
27
|
+
results = client.run(query)
|
|
28
|
+
elif hasattr(client, "execute"):
|
|
29
|
+
results = client.execute(query)
|
|
30
|
+
else:
|
|
31
|
+
raise ValueError("Grafeo client must have query(), run(), or execute() method")
|
|
32
|
+
|
|
33
|
+
return to_points(results)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Grafeo result conversion."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def to_points(results: Any) -> list[dict[str, Any]]:
|
|
9
|
+
"""Convert Grafeo results to points format."""
|
|
10
|
+
# Handle different result types
|
|
11
|
+
if hasattr(results, "to_dict"):
|
|
12
|
+
results = results.to_dict("records")
|
|
13
|
+
elif hasattr(results, "records"):
|
|
14
|
+
results = [dict(r) for r in results.records()]
|
|
15
|
+
elif not isinstance(results, list):
|
|
16
|
+
results = list(results)
|
|
17
|
+
|
|
18
|
+
points = []
|
|
19
|
+
for i, item in enumerate(results):
|
|
20
|
+
if isinstance(item, dict):
|
|
21
|
+
point: dict[str, Any] = {"id": str(item.get("id", f"point_{i}"))}
|
|
22
|
+
|
|
23
|
+
# Look for vector/embedding
|
|
24
|
+
vector = item.get("vector") or item.get("embedding")
|
|
25
|
+
if vector:
|
|
26
|
+
vec = list(vector) if hasattr(vector, "__iter__") else [vector]
|
|
27
|
+
point["x"] = float(vec[0]) if len(vec) > 0 else 0
|
|
28
|
+
point["y"] = float(vec[1]) if len(vec) > 1 else 0
|
|
29
|
+
point["z"] = float(vec[2]) if len(vec) > 2 else 0
|
|
30
|
+
point["vector"] = vec
|
|
31
|
+
else:
|
|
32
|
+
point["x"] = float(item.get("x", 0))
|
|
33
|
+
point["y"] = float(item.get("y", 0))
|
|
34
|
+
point["z"] = float(item.get("z", 0))
|
|
35
|
+
|
|
36
|
+
# Add all other fields
|
|
37
|
+
for k, v in item.items():
|
|
38
|
+
if k not in ("id", "vector", "embedding", "x", "y", "z"):
|
|
39
|
+
point[k] = v
|
|
40
|
+
|
|
41
|
+
points.append(point)
|
|
42
|
+
else:
|
|
43
|
+
# Raw value
|
|
44
|
+
points.append({"id": f"point_{i}", "data": item, "x": 0, "y": 0, "z": 0})
|
|
45
|
+
|
|
46
|
+
return points
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""LanceDB backend adapter.
|
|
2
|
+
|
|
3
|
+
Query Format:
|
|
4
|
+
# Vector search (pass vector separately)
|
|
5
|
+
{"vector": [0.1, 0.2, ...], "limit": 10}
|
|
6
|
+
|
|
7
|
+
# With SQL filter
|
|
8
|
+
{"vector": [...], "where": "category = 'tech' AND year > 2020", "limit": 10}
|
|
9
|
+
|
|
10
|
+
# Filter only (no vector)
|
|
11
|
+
{"where": "category = 'tech'", "limit": 100}
|
|
12
|
+
|
|
13
|
+
# Full-text search
|
|
14
|
+
{"fts": "search query", "limit": 10}
|
|
15
|
+
|
|
16
|
+
SQL WHERE supports: =, !=, <, >, <=, >=, AND, OR, IN, LIKE, IS NULL, IS NOT NULL
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from anywidget_vector.backends.lancedb.client import execute_query
|
|
20
|
+
from anywidget_vector.backends.lancedb.converter import to_points
|
|
21
|
+
|
|
22
|
+
__all__ = ["to_points", "execute_query"]
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""LanceDB Python client."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from anywidget_vector.backends.lancedb.converter import to_points
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def execute_query(
|
|
12
|
+
table: Any,
|
|
13
|
+
query: str | dict[str, Any],
|
|
14
|
+
) -> list[dict[str, Any]]:
|
|
15
|
+
"""Execute query against LanceDB table.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
table: LanceDB table object
|
|
19
|
+
query: Query dict or JSON string
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
List of points
|
|
23
|
+
"""
|
|
24
|
+
if isinstance(query, str):
|
|
25
|
+
query = json.loads(query)
|
|
26
|
+
|
|
27
|
+
limit = query.get("limit", 100)
|
|
28
|
+
|
|
29
|
+
# Vector search
|
|
30
|
+
if "vector" in query:
|
|
31
|
+
search = table.search(query["vector"])
|
|
32
|
+
if "where" in query:
|
|
33
|
+
search = search.where(query["where"])
|
|
34
|
+
results = search.limit(limit).to_list()
|
|
35
|
+
return to_points(results)
|
|
36
|
+
|
|
37
|
+
# Full-text search
|
|
38
|
+
if "fts" in query:
|
|
39
|
+
search = table.search(query["fts"], query_type="fts")
|
|
40
|
+
if "where" in query:
|
|
41
|
+
search = search.where(query["where"])
|
|
42
|
+
results = search.limit(limit).to_list()
|
|
43
|
+
return to_points(results)
|
|
44
|
+
|
|
45
|
+
# Filter only
|
|
46
|
+
if "where" in query:
|
|
47
|
+
# Use pandas for filtered scan
|
|
48
|
+
df = table.to_pandas()
|
|
49
|
+
# Simple eval for WHERE clause (in production, use proper SQL parser)
|
|
50
|
+
# For now, return all and let filter happen client-side
|
|
51
|
+
results = df.head(limit).to_dict("records")
|
|
52
|
+
return to_points(results)
|
|
53
|
+
|
|
54
|
+
# Get all
|
|
55
|
+
results = table.to_pandas().head(limit).to_dict("records")
|
|
56
|
+
return to_points(results)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""LanceDB result conversion."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def to_points(results: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
9
|
+
"""Convert LanceDB results to points format."""
|
|
10
|
+
points = []
|
|
11
|
+
|
|
12
|
+
for i, row in enumerate(results):
|
|
13
|
+
point: dict[str, Any] = {"id": str(row.get("id", f"point_{i}"))}
|
|
14
|
+
|
|
15
|
+
# Distance -> score
|
|
16
|
+
if "_distance" in row:
|
|
17
|
+
point["score"] = 1 / (1 + row["_distance"])
|
|
18
|
+
|
|
19
|
+
# Find vector field
|
|
20
|
+
vector = None
|
|
21
|
+
for key in ("vector", "embedding", "embeddings", "_vec"):
|
|
22
|
+
if key in row and row[key] is not None:
|
|
23
|
+
vector = row[key]
|
|
24
|
+
break
|
|
25
|
+
|
|
26
|
+
if vector is not None:
|
|
27
|
+
vec = list(vector) if hasattr(vector, "__iter__") else [vector]
|
|
28
|
+
point["x"] = float(vec[0]) if len(vec) > 0 else 0
|
|
29
|
+
point["y"] = float(vec[1]) if len(vec) > 1 else 0
|
|
30
|
+
point["z"] = float(vec[2]) if len(vec) > 2 else 0
|
|
31
|
+
point["vector"] = vec
|
|
32
|
+
|
|
33
|
+
# Add other fields
|
|
34
|
+
skip_keys = {"id", "vector", "embedding", "embeddings", "_vec", "_distance"}
|
|
35
|
+
for k, v in row.items():
|
|
36
|
+
if k not in skip_keys:
|
|
37
|
+
point[k] = v
|
|
38
|
+
|
|
39
|
+
points.append(point)
|
|
40
|
+
|
|
41
|
+
return points
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def build_where(conditions: list[tuple[str, str, Any]]) -> str:
|
|
45
|
+
"""Build SQL WHERE clause from conditions.
|
|
46
|
+
|
|
47
|
+
Returns SQL string compatible with LanceDB.
|
|
48
|
+
"""
|
|
49
|
+
if not conditions:
|
|
50
|
+
return ""
|
|
51
|
+
|
|
52
|
+
parts = []
|
|
53
|
+
for field, op, value in conditions:
|
|
54
|
+
if op == "~":
|
|
55
|
+
# LIKE for partial match
|
|
56
|
+
parts.append(f"{field} LIKE '%{value}%'")
|
|
57
|
+
elif op == ":":
|
|
58
|
+
# IN for array contains
|
|
59
|
+
if isinstance(value, list):
|
|
60
|
+
values = ", ".join(f"'{v}'" if isinstance(v, str) else str(v) for v in value)
|
|
61
|
+
else:
|
|
62
|
+
values = f"'{value}'" if isinstance(value, str) else str(value)
|
|
63
|
+
parts.append(f"{field} IN ({values})")
|
|
64
|
+
else:
|
|
65
|
+
# Standard operators
|
|
66
|
+
if isinstance(value, str):
|
|
67
|
+
parts.append(f"{field} {op} '{value}'")
|
|
68
|
+
else:
|
|
69
|
+
parts.append(f"{field} {op} {value}")
|
|
70
|
+
|
|
71
|
+
return " AND ".join(parts)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Pinecone backend adapter.
|
|
2
|
+
|
|
3
|
+
Query Format (JSON):
|
|
4
|
+
# Similarity search
|
|
5
|
+
{"vector": [0.1, 0.2, ...], "topK": 10}
|
|
6
|
+
|
|
7
|
+
# With filter
|
|
8
|
+
{"vector": [...], "filter": {"category": {"$eq": "tech"}}, "topK": 10}
|
|
9
|
+
|
|
10
|
+
# With namespace
|
|
11
|
+
{"vector": [...], "topK": 10, "namespace": "my-namespace"}
|
|
12
|
+
|
|
13
|
+
# Fetch by IDs
|
|
14
|
+
{"ids": ["id1", "id2"]}
|
|
15
|
+
|
|
16
|
+
Filter operators: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from anywidget_vector.backends.pinecone.converter import build_filter, to_points
|
|
20
|
+
|
|
21
|
+
__all__ = ["to_points", "build_filter"]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
// Pinecone browser-side client
|
|
2
|
+
|
|
3
|
+
export async function executeQuery(query, config) {
|
|
4
|
+
const { url, apiKey, namespace } = config;
|
|
5
|
+
const headers = { "Content-Type": "application/json", "Api-Key": apiKey };
|
|
6
|
+
|
|
7
|
+
const parsed = typeof query === "string" ? JSON.parse(query) : query;
|
|
8
|
+
|
|
9
|
+
if (parsed.ids) {
|
|
10
|
+
// Fetch by IDs
|
|
11
|
+
const resp = await fetch(`${url}/vectors/fetch?${parsed.ids.map(id => `ids=${id}`).join("&")}`, {
|
|
12
|
+
method: "GET",
|
|
13
|
+
headers,
|
|
14
|
+
});
|
|
15
|
+
if (!resp.ok) throw new Error(`Pinecone error: ${await resp.text()}`);
|
|
16
|
+
return await resp.json();
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// Query
|
|
20
|
+
const body = {
|
|
21
|
+
vector: parsed.vector,
|
|
22
|
+
topK: parsed.topK || parsed.limit || 10,
|
|
23
|
+
includeMetadata: true,
|
|
24
|
+
includeValues: true,
|
|
25
|
+
filter: parsed.filter,
|
|
26
|
+
namespace: parsed.namespace || namespace,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
const resp = await fetch(`${url}/query`, { method: "POST", headers, body: JSON.stringify(body) });
|
|
30
|
+
if (!resp.ok) throw new Error(`Pinecone error: ${await resp.text()}`);
|
|
31
|
+
return await resp.json();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function toPoints(response) {
|
|
35
|
+
const matches = response.matches || [];
|
|
36
|
+
return matches.map(m => ({
|
|
37
|
+
id: m.id,
|
|
38
|
+
score: m.score,
|
|
39
|
+
x: m.values?.[0] ?? m.metadata?.x ?? 0,
|
|
40
|
+
y: m.values?.[1] ?? m.metadata?.y ?? 0,
|
|
41
|
+
z: m.values?.[2] ?? m.metadata?.z ?? 0,
|
|
42
|
+
vector: m.values,
|
|
43
|
+
...m.metadata,
|
|
44
|
+
}));
|
|
45
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Pinecone result conversion and filter building."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def to_points(response: dict[str, Any]) -> list[dict[str, Any]]:
|
|
9
|
+
"""Convert Pinecone response to points format."""
|
|
10
|
+
matches = response.get("matches") or []
|
|
11
|
+
|
|
12
|
+
points = []
|
|
13
|
+
for m in matches:
|
|
14
|
+
point: dict[str, Any] = {"id": m.get("id", "")}
|
|
15
|
+
|
|
16
|
+
if "score" in m:
|
|
17
|
+
point["score"] = m["score"]
|
|
18
|
+
|
|
19
|
+
values = m.get("values", [])
|
|
20
|
+
metadata = m.get("metadata", {})
|
|
21
|
+
|
|
22
|
+
if values:
|
|
23
|
+
point["x"] = float(values[0]) if len(values) > 0 else 0
|
|
24
|
+
point["y"] = float(values[1]) if len(values) > 1 else 0
|
|
25
|
+
point["z"] = float(values[2]) if len(values) > 2 else 0
|
|
26
|
+
point["vector"] = values
|
|
27
|
+
else:
|
|
28
|
+
point["x"] = float(metadata.get("x", 0))
|
|
29
|
+
point["y"] = float(metadata.get("y", 0))
|
|
30
|
+
point["z"] = float(metadata.get("z", 0))
|
|
31
|
+
|
|
32
|
+
point.update(metadata)
|
|
33
|
+
points.append(point)
|
|
34
|
+
|
|
35
|
+
return points
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def build_filter(conditions: list[tuple[str, str, Any]]) -> dict[str, Any]:
|
|
39
|
+
"""Build Pinecone filter from conditions.
|
|
40
|
+
|
|
41
|
+
Pinecone uses MongoDB-style operators: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin
|
|
42
|
+
"""
|
|
43
|
+
filter_dict = {}
|
|
44
|
+
|
|
45
|
+
op_map = {
|
|
46
|
+
"=": "$eq",
|
|
47
|
+
"!=": "$ne",
|
|
48
|
+
">": "$gt",
|
|
49
|
+
">=": "$gte",
|
|
50
|
+
"<": "$lt",
|
|
51
|
+
"<=": "$lte",
|
|
52
|
+
":": "$in", # Array contains
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
for field, op, value in conditions:
|
|
56
|
+
pinecone_op = op_map.get(op)
|
|
57
|
+
if pinecone_op:
|
|
58
|
+
if pinecone_op == "$in" and not isinstance(value, list):
|
|
59
|
+
value = [value]
|
|
60
|
+
filter_dict[field] = {pinecone_op: value}
|
|
61
|
+
|
|
62
|
+
return filter_dict
|