eigenlake 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eigenlake-0.2.0/.gitignore +10 -0
- eigenlake-0.2.0/PKG-INFO +98 -0
- eigenlake-0.2.0/README.md +70 -0
- eigenlake-0.2.0/docs/CNAME +1 -0
- eigenlake-0.2.0/docs/api/client.md +31 -0
- eigenlake-0.2.0/docs/getting-started.md +130 -0
- eigenlake-0.2.0/docs/guides/clustering-agent.md +151 -0
- eigenlake-0.2.0/docs/guides/common-workflows.md +133 -0
- eigenlake-0.2.0/docs/guides/deployment.md +93 -0
- eigenlake-0.2.0/docs/guides/errors-and-retries.md +51 -0
- eigenlake-0.2.0/docs/guides/schema-builder.md +77 -0
- eigenlake-0.2.0/docs/index.md +57 -0
- eigenlake-0.2.0/mkdocs.yml +41 -0
- eigenlake-0.2.0/pyproject.toml +63 -0
- eigenlake-0.2.0/src/eigenlake/__init__.py +46 -0
- eigenlake-0.2.0/src/eigenlake/client.py +532 -0
- eigenlake-0.2.0/src/eigenlake/errors.py +29 -0
- eigenlake-0.2.0/src/eigenlake/py.typed +1 -0
- eigenlake-0.2.0/src/eigenlake/schema.py +209 -0
- eigenlake-0.2.0/src/eigenlake/transport.py +104 -0
eigenlake-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: eigenlake
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Python SDK for EigenLake Cloud
|
|
5
|
+
Project-URL: Homepage, https://eigenlake.dev
|
|
6
|
+
Project-URL: Documentation, https://docs.eigenlake.dev
|
|
7
|
+
Project-URL: Repository, https://github.com/EigenLake-Org/eigenlake-client
|
|
8
|
+
Project-URL: Issues, https://github.com/EigenLake-Org/eigenlake-client/issues
|
|
9
|
+
Author: EigenLake
|
|
10
|
+
Keywords: agent,clustering,eigenlake,embeddings,vector-search
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Classifier: Typing :: Typed
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: httpx>=0.27.0
|
|
23
|
+
Provides-Extra: docs
|
|
24
|
+
Requires-Dist: mkdocs-material>=9.5.0; extra == 'docs'
|
|
25
|
+
Requires-Dist: mkdocstrings[python]>=0.25.0; extra == 'docs'
|
|
26
|
+
Requires-Dist: pymdown-extensions>=10.0.0; extra == 'docs'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# EigenLake Python Client
|
|
30
|
+
|
|
31
|
+
Python SDK for EigenLake Cloud.
|
|
32
|
+
|
|
33
|
+
## Install
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install eigenlake
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Quickstart
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import eigenlake
|
|
43
|
+
from eigenlake import schema as s
|
|
44
|
+
|
|
45
|
+
with eigenlake.connect(
|
|
46
|
+
url="https://api.eigenlake.dev",
|
|
47
|
+
api_key="<sk_sbx_your_api_key_here>",
|
|
48
|
+
) as client:
|
|
49
|
+
schema, index_options = (
|
|
50
|
+
s.SchemaBuilder(additional_properties=False)
|
|
51
|
+
.add("document_id", s.string(required=True, filterable=True))
|
|
52
|
+
.add("text", s.string(filterable=False))
|
|
53
|
+
.add("created_at", s.datetime(filterable=True))
|
|
54
|
+
.build()
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
idx = client.indexes.create_or_get(
|
|
58
|
+
namespace="demo-namespace",
|
|
59
|
+
index="demo-index",
|
|
60
|
+
dimensions=128,
|
|
61
|
+
schema=schema,
|
|
62
|
+
index_options=index_options,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
record_id = idx.records.add(
|
|
66
|
+
properties={"document_id": "doc-1", "text": "hello"},
|
|
67
|
+
vector=[0.1] * 128,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
result = idx.search.nearest(
|
|
71
|
+
vector=[0.1] * 128,
|
|
72
|
+
limit=3,
|
|
73
|
+
)
|
|
74
|
+
print(record_id, result)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Agent Query
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
import eigenlake
|
|
81
|
+
|
|
82
|
+
with eigenlake.connect(url="https://api.eigenlake.dev", api_key="<sk_sbx_your_api_key_here>") as client:
|
|
83
|
+
idx = client.indexes.open(namespace="demo-automotive", index="automotive-fault-clustering")
|
|
84
|
+
result = idx.agent.query("show me recent battery failures")
|
|
85
|
+
print(result["filter"])
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Docs
|
|
89
|
+
|
|
90
|
+
- Documentation source: `docs/`
|
|
91
|
+
- Docs site config: `mkdocs.yml`
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
pip install -e ".[docs]"
|
|
95
|
+
mkdocs serve
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Docs will be available at `http://127.0.0.1:8000`.
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# EigenLake Python Client
|
|
2
|
+
|
|
3
|
+
Python SDK for EigenLake Cloud.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install eigenlake
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quickstart
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
import eigenlake
|
|
15
|
+
from eigenlake import schema as s
|
|
16
|
+
|
|
17
|
+
with eigenlake.connect(
|
|
18
|
+
url="https://api.eigenlake.dev",
|
|
19
|
+
api_key="<sk_sbx_your_api_key_here>",
|
|
20
|
+
) as client:
|
|
21
|
+
schema, index_options = (
|
|
22
|
+
s.SchemaBuilder(additional_properties=False)
|
|
23
|
+
.add("document_id", s.string(required=True, filterable=True))
|
|
24
|
+
.add("text", s.string(filterable=False))
|
|
25
|
+
.add("created_at", s.datetime(filterable=True))
|
|
26
|
+
.build()
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
idx = client.indexes.create_or_get(
|
|
30
|
+
namespace="demo-namespace",
|
|
31
|
+
index="demo-index",
|
|
32
|
+
dimensions=128,
|
|
33
|
+
schema=schema,
|
|
34
|
+
index_options=index_options,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
record_id = idx.records.add(
|
|
38
|
+
properties={"document_id": "doc-1", "text": "hello"},
|
|
39
|
+
vector=[0.1] * 128,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
result = idx.search.nearest(
|
|
43
|
+
vector=[0.1] * 128,
|
|
44
|
+
limit=3,
|
|
45
|
+
)
|
|
46
|
+
print(record_id, result)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Agent Query
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
import eigenlake
|
|
53
|
+
|
|
54
|
+
with eigenlake.connect(url="https://api.eigenlake.dev", api_key="<sk_sbx_your_api_key_here>") as client:
|
|
55
|
+
idx = client.indexes.open(namespace="demo-automotive", index="automotive-fault-clustering")
|
|
56
|
+
result = idx.agent.query("show me recent battery failures")
|
|
57
|
+
print(result["filter"])
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Docs
|
|
61
|
+
|
|
62
|
+
- Documentation source: `docs/`
|
|
63
|
+
- Docs site config: `mkdocs.yml`
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install -e ".[docs]"
|
|
67
|
+
mkdocs serve
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Docs will be available at `http://127.0.0.1:8000`.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
docs.eigenlake.dev
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Client API Reference
|
|
2
|
+
|
|
3
|
+
## Surface Map
|
|
4
|
+
|
|
5
|
+
- `eigenlake.connect(...)`
|
|
6
|
+
- `client.indexes.create_or_get(...)`
|
|
7
|
+
- `client.indexes.open(...)`
|
|
8
|
+
- `client.indexes.ref(...)`
|
|
9
|
+
- `index.records.*`
|
|
10
|
+
- `index.search.*`
|
|
11
|
+
- `index.search.cluster(...)`
|
|
12
|
+
- `index.agent.query(...)`
|
|
13
|
+
- `index.settings.*`
|
|
14
|
+
- `index.manage.*`
|
|
15
|
+
- `index.batch.with_size(...)`
|
|
16
|
+
|
|
17
|
+
## Top-level Module
|
|
18
|
+
|
|
19
|
+
::: eigenlake
|
|
20
|
+
|
|
21
|
+
## Client and Namespaces
|
|
22
|
+
|
|
23
|
+
::: eigenlake.client
|
|
24
|
+
|
|
25
|
+
## Schema Builder
|
|
26
|
+
|
|
27
|
+
::: eigenlake.schema
|
|
28
|
+
|
|
29
|
+
## Errors
|
|
30
|
+
|
|
31
|
+
::: eigenlake.errors
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Getting Started
|
|
2
|
+
|
|
3
|
+
## 1. Connect to EigenLake
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
import eigenlake
|
|
7
|
+
|
|
8
|
+
client = eigenlake.connect(
|
|
9
|
+
url="https://api.eigenlake.dev/",
|
|
10
|
+
api_key="<sk_sbx_your_api_key_here>",
|
|
11
|
+
)
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## 2. Define schema and index options
|
|
15
|
+
|
|
16
|
+
```python
|
|
17
|
+
from eigenlake import schema as s
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
schema, index_options = (
|
|
22
|
+
s.SchemaBuilder(additional_properties=False)
|
|
23
|
+
.add("document_id", s.string(required=True, filterable=True))
|
|
24
|
+
.add("document_title", s.string(filterable=True))
|
|
25
|
+
.add("chunk_number", s.integer(filterable=True))
|
|
26
|
+
.add("document_url", s.string(format="uri", filterable=True))
|
|
27
|
+
.add("created_at", s.datetime(filterable=True))
|
|
28
|
+
.add("tags", s.array(s.string(), filterable=False, max_items=20))
|
|
29
|
+
.build()
|
|
30
|
+
)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## 3. Create or open an index
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
idx = client.indexes.create_or_get(
|
|
37
|
+
namespace="demo-namespace",
|
|
38
|
+
index="demo-index",
|
|
39
|
+
dimensions=128,
|
|
40
|
+
schema=schema,
|
|
41
|
+
index_options=index_options,
|
|
42
|
+
)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Or open an existing index:
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
idx = client.indexes.open(
|
|
49
|
+
namespace="demo-namespace",
|
|
50
|
+
index="demo-index",
|
|
51
|
+
)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## 4. Insert records
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
record_id = idx.records.add(
|
|
58
|
+
properties={"document_id": "doc-1", "text": "hello"},
|
|
59
|
+
vector=[0.1] * 128,
|
|
60
|
+
)
|
|
61
|
+
print("inserted:", record_id)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Bulk insert:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
result = idx.records.add_many(
|
|
68
|
+
[
|
|
69
|
+
{"id": "doc-2", "properties": {"document_id": "doc-2", "text": "hello 2"}, "vector": [0.2] * 128},
|
|
70
|
+
{"id": "doc-3", "properties": {"document_id": "doc-3", "text": "hello 3"}, "vector": [0.3] * 128},
|
|
71
|
+
],
|
|
72
|
+
on_error="continue",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
print("inserted:", len(result))
|
|
76
|
+
print("errors:", result.number_errors)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## 5. Search and read
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
result = idx.search.nearest(
|
|
83
|
+
vector=[0.1] * 128,
|
|
84
|
+
limit=5,
|
|
85
|
+
)
|
|
86
|
+
print(result)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Fetch by id:
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
obj = idx.records.get("doc-1")
|
|
93
|
+
print(obj)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## 6. Cluster matching records
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
clusters = idx.search.cluster(
|
|
100
|
+
filter={"document_id": {"$eq": "doc-1"}},
|
|
101
|
+
limit=1000,
|
|
102
|
+
num_clusters=2,
|
|
103
|
+
)
|
|
104
|
+
print(clusters["records_clustered"])
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
For natural language requests, use agent mode:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
answer = idx.agent.query(
|
|
111
|
+
"show me recent failures",
|
|
112
|
+
mode="auto",
|
|
113
|
+
)
|
|
114
|
+
print(answer["action"])
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
See [Clustering and Agent Queries](guides/clustering-agent.md) for a full failure-analysis example.
|
|
118
|
+
|
|
119
|
+
## 7. Close client
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
client.close()
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
Use context manager to close automatically:
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
with eigenlake.connect(url="https://api.eigenlake.dev/", api_key="<sk_sbx_your_api_key_here>") as client:
|
|
129
|
+
print(client.ready())
|
|
130
|
+
```
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Clustering and Agent Queries
|
|
2
|
+
|
|
3
|
+
EigenLake can group matching records into clusters directly from an index. This is useful for operational questions such as:
|
|
4
|
+
|
|
5
|
+
```text
|
|
6
|
+
show me recent battery failures
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
The low-level API is explicit: you provide filters, limits, and clustering options. Agent mode sits one level above that: it inspects the natural language query, builds schema-aware filters for common cases such as recent failures, infers useful summary text fields, and decides whether to run clustering or return filtered records.
|
|
10
|
+
|
|
11
|
+
## Index Schema
|
|
12
|
+
|
|
13
|
+
Agent mode can only infer filters for fields that exist in the index schema. For an automotive failure analysis demo, define filterable fields such as `system`, `status`, and `created_at`, plus descriptive string fields for summaries:
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from eigenlake import schema as s
|
|
17
|
+
|
|
18
|
+
schema, index_options = (
|
|
19
|
+
s.SchemaBuilder(additional_properties=False)
|
|
20
|
+
.add("vehicle_id", s.string(required=True, filterable=True))
|
|
21
|
+
.add("model", s.string(filterable=True))
|
|
22
|
+
.add("system", s.string(filterable=True, enum=["battery", "charging", "brake", "powertrain"]))
|
|
23
|
+
.add("status", s.string(filterable=True, enum=["ok", "warning", "failure"]))
|
|
24
|
+
.add("severity", s.string(filterable=True, enum=["low", "medium", "high", "critical"]))
|
|
25
|
+
.add("fault_code", s.string(filterable=True))
|
|
26
|
+
.add("symptom", s.string(filterable=False))
|
|
27
|
+
.add("repair_note", s.string(filterable=False))
|
|
28
|
+
.add("created_at", s.datetime(filterable=True))
|
|
29
|
+
.build()
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
idx = client.indexes.create_or_get(
|
|
33
|
+
namespace="demo-automotive",
|
|
34
|
+
index="vehicle-failures",
|
|
35
|
+
dimensions=128,
|
|
36
|
+
schema=schema,
|
|
37
|
+
index_options=index_options,
|
|
38
|
+
)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Use stable, application-level IDs for records. If you do not have UUIDs, use the SDK's `id` field with your own string ID, or set `record_id_property` when creating the index.
|
|
42
|
+
|
|
43
|
+
## Low-Level Clustering
|
|
44
|
+
|
|
45
|
+
Use `idx.search.cluster(...)` when you already know the filter and clustering settings.
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
recent_failure_filter = {
|
|
49
|
+
"system": {"$eq": "battery"},
|
|
50
|
+
"status": {"$in": ["failure"]},
|
|
51
|
+
"created_at": {"$gte": "<recent-start-iso8601>"},
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
clusters = idx.search.cluster(
|
|
55
|
+
filter=recent_failure_filter,
|
|
56
|
+
limit=1000,
|
|
57
|
+
algorithm="kmeans",
|
|
58
|
+
num_clusters=3,
|
|
59
|
+
distance_metric="cosine",
|
|
60
|
+
representatives_per_cluster=2,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
for cluster in clusters["clusters"]:
|
|
64
|
+
print(cluster["cluster_id"], cluster["count"], cluster["summary"])
|
|
65
|
+
for representative in cluster["representatives"]:
|
|
66
|
+
print(" ", representative["uuid"], representative["properties"])
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Typical response fields:
|
|
70
|
+
|
|
71
|
+
- `backend`: currently `lambda` in production deployments
|
|
72
|
+
- `algorithm`: currently `kmeans`
|
|
73
|
+
- `distance_metric`: `cosine` or `euclidean`
|
|
74
|
+
- `records_clustered`: number of records included after filtering
|
|
75
|
+
- `clusters`: cluster summaries, counts, centroids, representative IDs, and representative records
|
|
76
|
+
|
|
77
|
+
If `num_clusters` is omitted, the API chooses a small default based on the number of matching records.
|
|
78
|
+
|
|
79
|
+
## Agent Mode
|
|
80
|
+
|
|
81
|
+
Use `idx.agent.query(...)` when the caller gives a natural language request and you want EigenLake to choose the action.
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
result = idx.agent.query("show me recent battery failures")
|
|
85
|
+
|
|
86
|
+
print(result["action"])
|
|
87
|
+
print(result["filter"])
|
|
88
|
+
|
|
89
|
+
for cluster in result["clusters"]:
|
|
90
|
+
print(cluster["count"], cluster["summary"])
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
For this schema, the agent infers a filter similar to:
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
{
|
|
97
|
+
"status": {"$in": ["failure"]},
|
|
98
|
+
"created_at": {"$gte": "<recent-start-iso8601>"},
|
|
99
|
+
"system": {"$eq": "battery"},
|
|
100
|
+
}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
It also infers summary fields such as `fault_code`, `symptom`, and `repair_note`.
|
|
104
|
+
|
|
105
|
+
In `mode="auto"`, the agent currently uses simple, deterministic query hints. Queries containing clustering or failure-analysis language are routed to clustering. Other queries are routed to filtered record retrieval.
|
|
106
|
+
|
|
107
|
+
You can force behavior:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
idx.agent.query("recent failures", mode="cluster")
|
|
111
|
+
idx.agent.query("recent failures", mode="filter")
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Advanced overrides are still available when a schema uses unusual field names:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
idx.agent.query(
|
|
118
|
+
"show me recent failures",
|
|
119
|
+
failure_field="outcome",
|
|
120
|
+
recent_days=30,
|
|
121
|
+
text_fields=["description", "resolution"],
|
|
122
|
+
)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Status
|
|
126
|
+
|
|
127
|
+
Clustering is synchronous today. The API returns only after Lambda clustering completes.
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
def clustering_status(result: dict) -> dict:
|
|
131
|
+
return {
|
|
132
|
+
"status": "completed",
|
|
133
|
+
"backend": result.get("backend"),
|
|
134
|
+
"records_clustered": result.get("records_clustered"),
|
|
135
|
+
"cluster_count": len(result.get("clusters") or []),
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
print(clustering_status(clusters))
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
There is no queued job ID yet for clustering. If `backend` is `lambda`, the API invoked Lambda and waited for the response.
|
|
142
|
+
|
|
143
|
+
Future compute backends may include GPUs, Spark, and automatic compute selection based on request requirements.
|
|
144
|
+
|
|
145
|
+
## Full Demo
|
|
146
|
+
|
|
147
|
+
See the clustering demo notebook in the examples repository:
|
|
148
|
+
|
|
149
|
+
```text
|
|
150
|
+
demos/clustering_agent/query_failures_clustering_demo.ipynb
|
|
151
|
+
```
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# Common Workflows
|
|
2
|
+
|
|
3
|
+
## Open Index Handle
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
idx = client.indexes.open(
|
|
7
|
+
namespace="demo-namespace",
|
|
8
|
+
index="demo-index",
|
|
9
|
+
)
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Insert One Record
|
|
13
|
+
|
|
14
|
+
```python
|
|
15
|
+
record_id = idx.records.add(
|
|
16
|
+
id="doc-1",
|
|
17
|
+
properties={"document_id": "doc-1", "text": "hello"},
|
|
18
|
+
vector=[0.1] * 128,
|
|
19
|
+
)
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Insert Many Records
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
result = idx.records.add_many(
|
|
26
|
+
[
|
|
27
|
+
{"id": "doc-2", "properties": {"document_id": "doc-2"}, "vector": [0.2] * 128},
|
|
28
|
+
{"id": "doc-3", "properties": {"document_id": "doc-3"}, "vector": [0.3] * 128},
|
|
29
|
+
],
|
|
30
|
+
on_error="continue",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
print("inserted:", len(result))
|
|
34
|
+
print("error_count:", result.number_errors)
|
|
35
|
+
print("failed_records:", result.failed_records)
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## High-Throughput Batch Helper
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
with idx.batch.with_size(batch_size=200, max_workers=4, on_error="continue") as batch:
|
|
42
|
+
batch.add(id="doc-10", properties={"document_id": "doc-10"}, vector=[0.1] * 128)
|
|
43
|
+
batch.add(id="doc-11", properties={"document_id": "doc-11"}, vector=[0.2] * 128)
|
|
44
|
+
|
|
45
|
+
print("error_count:", batch.number_errors)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Nearest Search
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
search_result = idx.search.nearest(
|
|
52
|
+
vector=[0.1] * 128,
|
|
53
|
+
limit=10,
|
|
54
|
+
filter={"document_id": {"$eq": "doc-1"}},
|
|
55
|
+
)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Cluster Search Results
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
clusters = idx.search.cluster(
|
|
62
|
+
filter={"status": {"$in": ["failed", "failure", "error"]}},
|
|
63
|
+
limit=1000,
|
|
64
|
+
num_clusters=3,
|
|
65
|
+
distance_metric="cosine",
|
|
66
|
+
representatives_per_cluster=2,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
for cluster in clusters["clusters"]:
|
|
70
|
+
print(cluster["count"], cluster["summary"])
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Agent Query
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
result = idx.agent.query(
|
|
77
|
+
"show me queries recent failures",
|
|
78
|
+
mode="auto",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
print(result["action"])
|
|
82
|
+
print(result["filter"])
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## List and Iterate
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
page = idx.search.list(limit=100, offset=0)
|
|
89
|
+
print(page)
|
|
90
|
+
|
|
91
|
+
for obj in idx.search.iterate(page_size=500):
|
|
92
|
+
print(obj)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Update and Replace
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
idx.records.update(
|
|
99
|
+
id="doc-1",
|
|
100
|
+
properties={"text": "updated"},
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
idx.records.replace(
|
|
104
|
+
id="doc-1",
|
|
105
|
+
properties={"document_id": "doc-1", "text": "replaced"},
|
|
106
|
+
vector=[0.4] * 128,
|
|
107
|
+
)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Delete
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
idx.records.remove("doc-1")
|
|
114
|
+
|
|
115
|
+
job = idx.records.remove_many(
|
|
116
|
+
filter={"document_id": {"$in": ["doc-2", "doc-3"]}},
|
|
117
|
+
background=True,
|
|
118
|
+
)
|
|
119
|
+
print(job)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Index Metadata and Maintenance
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
print(idx.settings.dimensions())
|
|
126
|
+
print(idx.settings.schema())
|
|
127
|
+
print(idx.settings.shards())
|
|
128
|
+
|
|
129
|
+
idx.manage.remove_by_filter(
|
|
130
|
+
filter={"created_at": {"$lt": "2024-01-01T00:00:00Z"}},
|
|
131
|
+
background=True,
|
|
132
|
+
)
|
|
133
|
+
```
|