eigenlake 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ .DS_Store
4
+ .venv/
5
+ .venv-*/
6
+ .venv-docs/
7
+ build/
8
+ dist/
9
+ *.egg-info/
10
+ site/
@@ -0,0 +1,98 @@
1
+ Metadata-Version: 2.4
2
+ Name: eigenlake
3
+ Version: 0.2.0
4
+ Summary: Python SDK for EigenLake Cloud
5
+ Project-URL: Homepage, https://eigenlake.dev
6
+ Project-URL: Documentation, https://docs.eigenlake.dev
7
+ Project-URL: Repository, https://github.com/EigenLake-Org/eigenlake-client
8
+ Project-URL: Issues, https://github.com/EigenLake-Org/eigenlake-client/issues
9
+ Author: EigenLake
10
+ Keywords: agent,clustering,eigenlake,embeddings,vector-search
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Classifier: Typing :: Typed
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: httpx>=0.27.0
23
+ Provides-Extra: docs
24
+ Requires-Dist: mkdocs-material>=9.5.0; extra == 'docs'
25
+ Requires-Dist: mkdocstrings[python]>=0.25.0; extra == 'docs'
26
+ Requires-Dist: pymdown-extensions>=10.0.0; extra == 'docs'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # EigenLake Python Client
30
+
31
+ Python SDK for EigenLake Cloud.
32
+
33
+ ## Install
34
+
35
+ ```bash
36
+ pip install eigenlake
37
+ ```
38
+
39
+ ## Quickstart
40
+
41
+ ```python
42
+ import eigenlake
43
+ from eigenlake import schema as s
44
+
45
+ with eigenlake.connect(
46
+ url="https://api.eigenlake.dev",
47
+ api_key="<sk_sbx_your_api_key_here>",
48
+ ) as client:
49
+ schema, index_options = (
50
+ s.SchemaBuilder(additional_properties=False)
51
+ .add("document_id", s.string(required=True, filterable=True))
52
+ .add("text", s.string(filterable=False))
53
+ .add("created_at", s.datetime(filterable=True))
54
+ .build()
55
+ )
56
+
57
+ idx = client.indexes.create_or_get(
58
+ namespace="demo-namespace",
59
+ index="demo-index",
60
+ dimensions=128,
61
+ schema=schema,
62
+ index_options=index_options,
63
+ )
64
+
65
+ record_id = idx.records.add(
66
+ properties={"document_id": "doc-1", "text": "hello"},
67
+ vector=[0.1] * 128,
68
+ )
69
+
70
+ result = idx.search.nearest(
71
+ vector=[0.1] * 128,
72
+ limit=3,
73
+ )
74
+ print(record_id, result)
75
+ ```
76
+
77
+ ## Agent Query
78
+
79
+ ```python
80
+ import eigenlake
81
+
82
+ with eigenlake.connect(url="https://api.eigenlake.dev", api_key="<sk_sbx_your_api_key_here>") as client:
83
+ idx = client.indexes.open(namespace="demo-automotive", index="automotive-fault-clustering")
84
+ result = idx.agent.query("show me recent battery failures")
85
+ print(result["filter"])
86
+ ```
87
+
88
+ ## Docs
89
+
90
+ - Documentation source: `docs/`
91
+ - Docs site config: `mkdocs.yml`
92
+
93
+ ```bash
94
+ pip install -e ".[docs]"
95
+ mkdocs serve
96
+ ```
97
+
98
+ Docs will be available at `http://127.0.0.1:8000`.
@@ -0,0 +1,70 @@
1
+ # EigenLake Python Client
2
+
3
+ Python SDK for EigenLake Cloud.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install eigenlake
9
+ ```
10
+
11
+ ## Quickstart
12
+
13
+ ```python
14
+ import eigenlake
15
+ from eigenlake import schema as s
16
+
17
+ with eigenlake.connect(
18
+ url="https://api.eigenlake.dev",
19
+ api_key="<sk_sbx_your_api_key_here>",
20
+ ) as client:
21
+ schema, index_options = (
22
+ s.SchemaBuilder(additional_properties=False)
23
+ .add("document_id", s.string(required=True, filterable=True))
24
+ .add("text", s.string(filterable=False))
25
+ .add("created_at", s.datetime(filterable=True))
26
+ .build()
27
+ )
28
+
29
+ idx = client.indexes.create_or_get(
30
+ namespace="demo-namespace",
31
+ index="demo-index",
32
+ dimensions=128,
33
+ schema=schema,
34
+ index_options=index_options,
35
+ )
36
+
37
+ record_id = idx.records.add(
38
+ properties={"document_id": "doc-1", "text": "hello"},
39
+ vector=[0.1] * 128,
40
+ )
41
+
42
+ result = idx.search.nearest(
43
+ vector=[0.1] * 128,
44
+ limit=3,
45
+ )
46
+ print(record_id, result)
47
+ ```
48
+
49
+ ## Agent Query
50
+
51
+ ```python
52
+ import eigenlake
53
+
54
+ with eigenlake.connect(url="https://api.eigenlake.dev", api_key="<sk_sbx_your_api_key_here>") as client:
55
+ idx = client.indexes.open(namespace="demo-automotive", index="automotive-fault-clustering")
56
+ result = idx.agent.query("show me recent battery failures")
57
+ print(result["filter"])
58
+ ```
59
+
60
+ ## Docs
61
+
62
+ - Documentation source: `docs/`
63
+ - Docs site config: `mkdocs.yml`
64
+
65
+ ```bash
66
+ pip install -e ".[docs]"
67
+ mkdocs serve
68
+ ```
69
+
70
+ Docs will be available at `http://127.0.0.1:8000`.
@@ -0,0 +1 @@
1
+ docs.eigenlake.dev
@@ -0,0 +1,31 @@
1
+ # Client API Reference
2
+
3
+ ## Surface Map
4
+
5
+ - `eigenlake.connect(...)`
6
+ - `client.indexes.create_or_get(...)`
7
+ - `client.indexes.open(...)`
8
+ - `client.indexes.ref(...)`
9
+ - `index.records.*`
10
+ - `index.search.*`
11
+ - `index.search.cluster(...)`
12
+ - `index.agent.query(...)`
13
+ - `index.settings.*`
14
+ - `index.manage.*`
15
+ - `index.batch.with_size(...)`
16
+
17
+ ## Top-level Module
18
+
19
+ ::: eigenlake
20
+
21
+ ## Client and Namespaces
22
+
23
+ ::: eigenlake.client
24
+
25
+ ## Schema Builder
26
+
27
+ ::: eigenlake.schema
28
+
29
+ ## Errors
30
+
31
+ ::: eigenlake.errors
@@ -0,0 +1,130 @@
1
+ # Getting Started
2
+
3
+ ## 1. Connect to EigenLake
4
+
5
+ ```python
6
+ import eigenlake
7
+
8
+ client = eigenlake.connect(
9
+ url="https://api.eigenlake.dev/",
10
+ api_key="<sk_sbx_your_api_key_here>",
11
+ )
12
+ ```
13
+
14
+ ## 2. Define schema and index options
15
+
16
+ ```python
17
+ from eigenlake import schema as s
18
+ ```
19
+
20
+ ```python
21
+ schema, index_options = (
22
+ s.SchemaBuilder(additional_properties=False)
23
+ .add("document_id", s.string(required=True, filterable=True))
24
+ .add("document_title", s.string(filterable=True))
25
+ .add("chunk_number", s.integer(filterable=True))
26
+ .add("document_url", s.string(format="uri", filterable=True))
27
+ .add("created_at", s.datetime(filterable=True))
28
+ .add("tags", s.array(s.string(), filterable=False, max_items=20))
29
+ .build()
30
+ )
31
+ ```
32
+
33
+ ## 3. Create or open an index
34
+
35
+ ```python
36
+ idx = client.indexes.create_or_get(
37
+ namespace="demo-namespace",
38
+ index="demo-index",
39
+ dimensions=128,
40
+ schema=schema,
41
+ index_options=index_options,
42
+ )
43
+ ```
44
+
45
+ Or open an existing index:
46
+
47
+ ```python
48
+ idx = client.indexes.open(
49
+ namespace="demo-namespace",
50
+ index="demo-index",
51
+ )
52
+ ```
53
+
54
+ ## 4. Insert records
55
+
56
+ ```python
57
+ record_id = idx.records.add(
58
+ properties={"document_id": "doc-1", "text": "hello"},
59
+ vector=[0.1] * 128,
60
+ )
61
+ print("inserted:", record_id)
62
+ ```
63
+
64
+ Bulk insert:
65
+
66
+ ```python
67
+ result = idx.records.add_many(
68
+ [
69
+ {"id": "doc-2", "properties": {"document_id": "doc-2", "text": "hello 2"}, "vector": [0.2] * 128},
70
+ {"id": "doc-3", "properties": {"document_id": "doc-3", "text": "hello 3"}, "vector": [0.3] * 128},
71
+ ],
72
+ on_error="continue",
73
+ )
74
+
75
+ print("inserted:", len(result))
76
+ print("errors:", result.number_errors)
77
+ ```
78
+
79
+ ## 5. Search and read
80
+
81
+ ```python
82
+ result = idx.search.nearest(
83
+ vector=[0.1] * 128,
84
+ limit=5,
85
+ )
86
+ print(result)
87
+ ```
88
+
89
+ Fetch by id:
90
+
91
+ ```python
92
+ obj = idx.records.get("doc-1")
93
+ print(obj)
94
+ ```
95
+
96
+ ## 6. Cluster matching records
97
+
98
+ ```python
99
+ clusters = idx.search.cluster(
100
+ filter={"document_id": {"$eq": "doc-1"}},
101
+ limit=1000,
102
+ num_clusters=2,
103
+ )
104
+ print(clusters["records_clustered"])
105
+ ```
106
+
107
+ For natural language requests, use agent mode:
108
+
109
+ ```python
110
+ answer = idx.agent.query(
111
+ "show me recent failures",
112
+ mode="auto",
113
+ )
114
+ print(answer["action"])
115
+ ```
116
+
117
+ See [Clustering and Agent Queries](guides/clustering-agent.md) for a full failure-analysis example.
118
+
119
+ ## 7. Close client
120
+
121
+ ```python
122
+ client.close()
123
+ ```
124
+
125
+ Use context manager to close automatically:
126
+
127
+ ```python
128
+ with eigenlake.connect(url="https://api.eigenlake.dev/", api_key="<sk_sbx_your_api_key_here>") as client:
129
+ print(client.ready())
130
+ ```
@@ -0,0 +1,151 @@
1
+ # Clustering and Agent Queries
2
+
3
+ EigenLake can group matching records into clusters directly from an index. This is useful for operational questions such as:
4
+
5
+ ```text
6
+ show me recent battery failures
7
+ ```
8
+
9
+ The low-level API is explicit: you provide filters, limits, and clustering options. Agent mode sits one level above that: it inspects the natural language query, builds schema-aware filters for common cases such as recent failures, infers useful summary text fields, and decides whether to run clustering or return filtered records.
10
+
11
+ ## Index Schema
12
+
13
+ Agent mode can only infer filters for fields that exist in the index schema. For an automotive failure analysis demo, define filterable fields such as `system`, `status`, and `created_at`, plus descriptive string fields for summaries:
14
+
15
+ ```python
16
+ from eigenlake import schema as s
17
+
18
+ schema, index_options = (
19
+ s.SchemaBuilder(additional_properties=False)
20
+ .add("vehicle_id", s.string(required=True, filterable=True))
21
+ .add("model", s.string(filterable=True))
22
+ .add("system", s.string(filterable=True, enum=["battery", "charging", "brake", "powertrain"]))
23
+ .add("status", s.string(filterable=True, enum=["ok", "warning", "failure"]))
24
+ .add("severity", s.string(filterable=True, enum=["low", "medium", "high", "critical"]))
25
+ .add("fault_code", s.string(filterable=True))
26
+ .add("symptom", s.string(filterable=False))
27
+ .add("repair_note", s.string(filterable=False))
28
+ .add("created_at", s.datetime(filterable=True))
29
+ .build()
30
+ )
31
+
32
+ idx = client.indexes.create_or_get(
33
+ namespace="demo-automotive",
34
+ index="vehicle-failures",
35
+ dimensions=128,
36
+ schema=schema,
37
+ index_options=index_options,
38
+ )
39
+ ```
40
+
41
+ Use stable, application-level IDs for records. If you do not have UUIDs, use the SDK's `id` field with your own string ID, or set `record_id_property` when creating the index.
42
+
43
+ ## Low-Level Clustering
44
+
45
+ Use `idx.search.cluster(...)` when you already know the filter and clustering settings.
46
+
47
+ ```python
48
+ recent_failure_filter = {
49
+ "system": {"$eq": "battery"},
50
+ "status": {"$in": ["failure"]},
51
+ "created_at": {"$gte": "<recent-start-iso8601>"},
52
+ }
53
+
54
+ clusters = idx.search.cluster(
55
+ filter=recent_failure_filter,
56
+ limit=1000,
57
+ algorithm="kmeans",
58
+ num_clusters=3,
59
+ distance_metric="cosine",
60
+ representatives_per_cluster=2,
61
+ )
62
+
63
+ for cluster in clusters["clusters"]:
64
+ print(cluster["cluster_id"], cluster["count"], cluster["summary"])
65
+ for representative in cluster["representatives"]:
66
+ print(" ", representative["uuid"], representative["properties"])
67
+ ```
68
+
69
+ Typical response fields:
70
+
71
+ - `backend`: currently `lambda` in production deployments
72
+ - `algorithm`: currently `kmeans`
73
+ - `distance_metric`: `cosine` or `euclidean`
74
+ - `records_clustered`: number of records included after filtering
75
+ - `clusters`: cluster summaries, counts, centroids, representative IDs, and representative records
76
+
77
+ If `num_clusters` is omitted, the API chooses a small default based on the number of matching records.
78
+
79
+ ## Agent Mode
80
+
81
+ Use `idx.agent.query(...)` when the caller gives a natural language request and you want EigenLake to choose the action.
82
+
83
+ ```python
84
+ result = idx.agent.query("show me recent battery failures")
85
+
86
+ print(result["action"])
87
+ print(result["filter"])
88
+
89
+ for cluster in result["clusters"]:
90
+ print(cluster["count"], cluster["summary"])
91
+ ```
92
+
93
+ For this schema, the agent infers a filter similar to:
94
+
95
+ ```python
96
+ {
97
+ "status": {"$in": ["failure"]},
98
+ "created_at": {"$gte": "<recent-start-iso8601>"},
99
+ "system": {"$eq": "battery"},
100
+ }
101
+ ```
102
+
103
+ It also infers summary fields such as `fault_code`, `symptom`, and `repair_note`.
104
+
105
+ In `mode="auto"`, the agent currently uses simple, deterministic query hints. Queries containing clustering or failure-analysis language are routed to clustering. Other queries are routed to filtered record retrieval.
106
+
107
+ You can force behavior:
108
+
109
+ ```python
110
+ idx.agent.query("recent failures", mode="cluster")
111
+ idx.agent.query("recent failures", mode="filter")
112
+ ```
113
+
114
+ Advanced overrides are still available when a schema uses unusual field names:
115
+
116
+ ```python
117
+ idx.agent.query(
118
+ "show me recent failures",
119
+ failure_field="outcome",
120
+ recent_days=30,
121
+ text_fields=["description", "resolution"],
122
+ )
123
+ ```
124
+
125
+ ## Status
126
+
127
+ Clustering is synchronous today. The API returns only after Lambda clustering completes.
128
+
129
+ ```python
130
+ def clustering_status(result: dict) -> dict:
131
+ return {
132
+ "status": "completed",
133
+ "backend": result.get("backend"),
134
+ "records_clustered": result.get("records_clustered"),
135
+ "cluster_count": len(result.get("clusters") or []),
136
+ }
137
+
138
+ print(clustering_status(clusters))
139
+ ```
140
+
141
+ There is no queued job ID yet for clustering. If `backend` is `lambda`, the API invoked Lambda and waited for the response.
142
+
143
+ Future compute backends may include GPUs, Spark, and automatic compute selection based on request requirements.
144
+
145
+ ## Full Demo
146
+
147
+ See the clustering demo notebook in the examples repository:
148
+
149
+ ```text
150
+ demos/clustering_agent/query_failures_clustering_demo.ipynb
151
+ ```
@@ -0,0 +1,133 @@
1
+ # Common Workflows
2
+
3
+ ## Open Index Handle
4
+
5
+ ```python
6
+ idx = client.indexes.open(
7
+ namespace="demo-namespace",
8
+ index="demo-index",
9
+ )
10
+ ```
11
+
12
+ ## Insert One Record
13
+
14
+ ```python
15
+ record_id = idx.records.add(
16
+ id="doc-1",
17
+ properties={"document_id": "doc-1", "text": "hello"},
18
+ vector=[0.1] * 128,
19
+ )
20
+ ```
21
+
22
+ ## Insert Many Records
23
+
24
+ ```python
25
+ result = idx.records.add_many(
26
+ [
27
+ {"id": "doc-2", "properties": {"document_id": "doc-2"}, "vector": [0.2] * 128},
28
+ {"id": "doc-3", "properties": {"document_id": "doc-3"}, "vector": [0.3] * 128},
29
+ ],
30
+ on_error="continue",
31
+ )
32
+
33
+ print("inserted:", len(result))
34
+ print("error_count:", result.number_errors)
35
+ print("failed_records:", result.failed_records)
36
+ ```
37
+
38
+ ## High-Throughput Batch Helper
39
+
40
+ ```python
41
+ with idx.batch.with_size(batch_size=200, max_workers=4, on_error="continue") as batch:
42
+ batch.add(id="doc-10", properties={"document_id": "doc-10"}, vector=[0.1] * 128)
43
+ batch.add(id="doc-11", properties={"document_id": "doc-11"}, vector=[0.2] * 128)
44
+
45
+ print("error_count:", batch.number_errors)
46
+ ```
47
+
48
+ ## Nearest Search
49
+
50
+ ```python
51
+ search_result = idx.search.nearest(
52
+ vector=[0.1] * 128,
53
+ limit=10,
54
+ filter={"document_id": {"$eq": "doc-1"}},
55
+ )
56
+ ```
57
+
58
+ ## Cluster Search Results
59
+
60
+ ```python
61
+ clusters = idx.search.cluster(
62
+ filter={"status": {"$in": ["failed", "failure", "error"]}},
63
+ limit=1000,
64
+ num_clusters=3,
65
+ distance_metric="cosine",
66
+ representatives_per_cluster=2,
67
+ )
68
+
69
+ for cluster in clusters["clusters"]:
70
+ print(cluster["count"], cluster["summary"])
71
+ ```
72
+
73
+ ## Agent Query
74
+
75
+ ```python
76
+ result = idx.agent.query(
77
+ "show me queries recent failures",
78
+ mode="auto",
79
+ )
80
+
81
+ print(result["action"])
82
+ print(result["filter"])
83
+ ```
84
+
85
+ ## List and Iterate
86
+
87
+ ```python
88
+ page = idx.search.list(limit=100, offset=0)
89
+ print(page)
90
+
91
+ for obj in idx.search.iterate(page_size=500):
92
+ print(obj)
93
+ ```
94
+
95
+ ## Update and Replace
96
+
97
+ ```python
98
+ idx.records.update(
99
+ id="doc-1",
100
+ properties={"text": "updated"},
101
+ )
102
+
103
+ idx.records.replace(
104
+ id="doc-1",
105
+ properties={"document_id": "doc-1", "text": "replaced"},
106
+ vector=[0.4] * 128,
107
+ )
108
+ ```
109
+
110
+ ## Delete
111
+
112
+ ```python
113
+ idx.records.remove("doc-1")
114
+
115
+ job = idx.records.remove_many(
116
+ filter={"document_id": {"$in": ["doc-2", "doc-3"]}},
117
+ background=True,
118
+ )
119
+ print(job)
120
+ ```
121
+
122
+ ## Index Metadata and Maintenance
123
+
124
+ ```python
125
+ print(idx.settings.dimensions())
126
+ print(idx.settings.schema())
127
+ print(idx.settings.shards())
128
+
129
+ idx.manage.remove_by_filter(
130
+ filter={"created_at": {"$lt": "2024-01-01T00:00:00Z"}},
131
+ background=True,
132
+ )
133
+ ```