overton-sdk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- overton_sdk-0.1.0/PKG-INFO +347 -0
- overton_sdk-0.1.0/README.md +331 -0
- overton_sdk-0.1.0/overton_sdk/__init__.py +56 -0
- overton_sdk-0.1.0/overton_sdk/auth.py +37 -0
- overton_sdk-0.1.0/overton_sdk/backends/__init__.py +0 -0
- overton_sdk-0.1.0/overton_sdk/backends/base.py +49 -0
- overton_sdk-0.1.0/overton_sdk/backends/http.py +213 -0
- overton_sdk-0.1.0/overton_sdk/backends/local.py +158 -0
- overton_sdk-0.1.0/overton_sdk/client.py +123 -0
- overton_sdk-0.1.0/overton_sdk/config.py +27 -0
- overton_sdk-0.1.0/overton_sdk/exceptions.py +61 -0
- overton_sdk-0.1.0/overton_sdk/manifold.py +76 -0
- overton_sdk-0.1.0/overton_sdk/models.py +91 -0
- overton_sdk-0.1.0/overton_sdk/py.typed +0 -0
- overton_sdk-0.1.0/overton_sdk.egg-info/PKG-INFO +347 -0
- overton_sdk-0.1.0/overton_sdk.egg-info/SOURCES.txt +36 -0
- overton_sdk-0.1.0/overton_sdk.egg-info/dependency_links.txt +1 -0
- overton_sdk-0.1.0/overton_sdk.egg-info/requires.txt +11 -0
- overton_sdk-0.1.0/overton_sdk.egg-info/top_level.txt +1 -0
- overton_sdk-0.1.0/pyproject.toml +32 -0
- overton_sdk-0.1.0/setup.cfg +4 -0
- overton_sdk-0.1.0/tests/test_client_init.py +70 -0
- overton_sdk-0.1.0/tests/test_customer.py +194 -0
- overton_sdk-0.1.0/tests/test_http_backend.py +330 -0
- overton_sdk-0.1.0/tests/test_integration.py +166 -0
- overton_sdk-0.1.0/tests/test_local_backend.py +205 -0
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: overton-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python SDK for the Overton platform.
|
|
5
|
+
Project-URL: Repository, https://github.com/Overton-Bio/overton-sdk
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: httpx>=0.27
|
|
9
|
+
Provides-Extra: local
|
|
10
|
+
Requires-Dist: overton-core>=0.1.0; extra == "local"
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: pytest; extra == "dev"
|
|
13
|
+
Requires-Dist: overton-core>=0.1.0; extra == "dev"
|
|
14
|
+
Provides-Extra: integration
|
|
15
|
+
Requires-Dist: overton-core[all-providers]>=0.1.0; extra == "integration"
|
|
16
|
+
|
|
17
|
+
# Overton SDK
|
|
18
|
+
|
|
19
|
+
Overton is a probabilistic knowledge graph that resolves entities and relations from
|
|
20
|
+
unstructured text. You write raw evidence in natural language; Overton handles LLM-based
|
|
21
|
+
extraction, entity deduplication, and uncertainty quantification.
|
|
22
|
+
|
|
23
|
+
## Prerequisites
|
|
24
|
+
|
|
25
|
+
You need two things from Overton:
|
|
26
|
+
|
|
27
|
+
- An **API token** (`ovtn_...`)
|
|
28
|
+
- Your **hostname** (e.g. `api.overton.bio`)
|
|
29
|
+
|
|
30
|
+
Overton is a hosted platform — there's nothing to run or operate on your side.
|
|
31
|
+
[Contact us](mailto:hello@overton.bio) to get a token.
|
|
32
|
+
|
|
33
|
+
## Installation
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install overton-sdk
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Requires Python 3.11+. The only dependency is `httpx`.
|
|
40
|
+
|
|
41
|
+
## Authorization and client initialization
|
|
42
|
+
|
|
43
|
+
Overton authenticates with the bearer token (`ovtn_...`) Overton issued you.
|
|
44
|
+
Construct an auth object and pass it to the client, along with your hostname:
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
import os
|
|
48
|
+
import overton_sdk
|
|
49
|
+
|
|
50
|
+
client = overton_sdk.OvertonClient(
|
|
51
|
+
auth=overton_sdk.UserTokenAuth(os.environ["OVERTON_TOKEN"]),
|
|
52
|
+
hostname="overton.example.com", # bare host; scheme defaults to https
|
|
53
|
+
)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Environment variables
|
|
57
|
+
|
|
58
|
+
If `OVERTON_TOKEN` and `OVERTON_HOSTNAME` are set, the client picks them up with no
|
|
59
|
+
arguments:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
export OVERTON_TOKEN=ovtn_...
|
|
63
|
+
export OVERTON_HOSTNAME=overton.example.com
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
client = overton_sdk.OvertonClient()
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Configuration
|
|
71
|
+
|
|
72
|
+
Tune transport behavior with a `Config` object:
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from overton_sdk import Config
|
|
76
|
+
|
|
77
|
+
client = overton_sdk.OvertonClient(
|
|
78
|
+
auth=overton_sdk.UserTokenAuth(token),
|
|
79
|
+
hostname="overton.example.com",
|
|
80
|
+
config=Config(timeout=60.0, max_retries=3, verify=True),
|
|
81
|
+
)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Use the client as a context manager to ensure connections are closed:
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
with overton_sdk.OvertonClient() as client:
|
|
88
|
+
m = client.manifold("my-manifold")
|
|
89
|
+
...
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
> [!TIP]
|
|
93
|
+
> For local development you can point at a server over plain HTTP by passing a full
|
|
94
|
+
> `hostname="http://localhost:8000"` — an explicit scheme is respected.
|
|
95
|
+
|
|
96
|
+
## Core concepts
|
|
97
|
+
|
|
98
|
+
| Concept | What it is |
|
|
99
|
+
|---|---|
|
|
100
|
+
| **Manifold** | A named knowledge graph. Create as many as you need — one per domain, project, or dataset. |
|
|
101
|
+
| **Evidence** | A piece of text you write in. Overton extracts entities and relations from it using an LLM. |
|
|
102
|
+
| **Contributor ID** | What `write()` returns — the ID of the evidence you just wrote. Use it for raw reads and curation. |
|
|
103
|
+
| **Entity ID** | The ID of a resolved real-world entity. Different from a contributor ID. Get entity IDs from `query()`. |
|
|
104
|
+
| **Covariate** | Structured metadata on evidence (`{"source": "reuters", "year": "2024"}`). Overton uses it to build conditional distributions. |
|
|
105
|
+
|
|
106
|
+
## Quickstart
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
import os
|
|
110
|
+
import overton_sdk
|
|
111
|
+
|
|
112
|
+
client = overton_sdk.OvertonClient(
|
|
113
|
+
auth=overton_sdk.UserTokenAuth(os.environ["OVERTON_TOKEN"]),
|
|
114
|
+
hostname="overton.example.com",
|
|
115
|
+
)
|
|
116
|
+
m = client.manifold("earnings")
|
|
117
|
+
|
|
118
|
+
# Write evidence
|
|
119
|
+
ids = m.write("Apple reported record profits of $94B in Q1 2024, driven by iPhone sales")
|
|
120
|
+
|
|
121
|
+
# Query resolved entities
|
|
122
|
+
results = m.query("tech company earnings", top_k=5)
|
|
123
|
+
for entity in results:
|
|
124
|
+
print(entity.entity_id, entity.point_estimate(), entity.type_distribution)
|
|
125
|
+
|
|
126
|
+
# Manifold statistics
|
|
127
|
+
stats = m.measures()
|
|
128
|
+
print(f"{stats.entity_count} entities, entropy={stats.aggregate_entropy:.3f}")
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Writing evidence
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
# Single string
|
|
135
|
+
m.write("Google acquired DeepMind for £400M in 2014")
|
|
136
|
+
|
|
137
|
+
# Multiple items in one call
|
|
138
|
+
m.write(
|
|
139
|
+
"Apple reported record Q1 profits",
|
|
140
|
+
"Microsoft Azure revenue grew 29% year over year",
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# With source and covariates
|
|
144
|
+
m.write({
|
|
145
|
+
"text": "Apple reported record Q1 profits",
|
|
146
|
+
"source": "reuters",
|
|
147
|
+
"covariates": {"quarter": "Q1-2024", "sentiment": "positive"},
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
# Mix of strings and dicts
|
|
151
|
+
m.write(
|
|
152
|
+
"Apple reported record profits",
|
|
153
|
+
{"text": "Google missed analyst estimates", "covariates": {"sentiment": "negative"}},
|
|
154
|
+
)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
`write()` returns a list of contributor IDs — one per entity or relation mention
|
|
158
|
+
extracted from your text by the LLM.
|
|
159
|
+
|
|
160
|
+
## Reading
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
# Raw read — get back the original text using a contributor ID
|
|
164
|
+
text = m.read(contributor_id, raw=True)
|
|
165
|
+
|
|
166
|
+
# Resolved read — get the entity distribution using an entity ID
|
|
167
|
+
# Note: entity IDs come from query(), not from write()
|
|
168
|
+
entity = m.read(entity_id)
|
|
169
|
+
print(entity.type_distribution) # {"ORG": 0.91, "PERSON": 0.09}
|
|
170
|
+
print(entity.point_estimate()) # "ORG"
|
|
171
|
+
print(entity.contributor_weights) # which evidence pieces contributed and how much
|
|
172
|
+
|
|
173
|
+
# Read a relation
|
|
174
|
+
relation = m.read_relation(relation_id)
|
|
175
|
+
print(relation.type_distribution)
|
|
176
|
+
print(relation.point_estimate({"cell_line": "HeLa"})) # conditional on a covariate
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## Querying
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
results = m.query("pharmaceutical company", top_k=10)
|
|
183
|
+
|
|
184
|
+
for entity in results:
|
|
185
|
+
print(entity.entity_id)
|
|
186
|
+
print(entity.point_estimate()) # most likely type
|
|
187
|
+
print(entity.type_distribution) # full probability distribution
|
|
188
|
+
print(entity.alias_ids) # other IDs this entity was known by
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Results are `EntityDistribution` objects ranked by semantic similarity.
|
|
192
|
+
|
|
193
|
+
## Measures
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
# Manifold-level
|
|
197
|
+
mm = m.measures()
|
|
198
|
+
mm.entity_count # number of resolved entities
|
|
199
|
+
mm.relation_count # number of resolved relations
|
|
200
|
+
mm.aggregate_entropy # average type uncertainty across all entities (nats)
|
|
201
|
+
mm.component_count # number of connected subgraphs
|
|
202
|
+
mm.uncertainty_map # {entity_id: entropy} for every entity
|
|
203
|
+
|
|
204
|
+
# Entity-level
|
|
205
|
+
em = m.entity_measures(entity_id)
|
|
206
|
+
em.type_entropy # uncertainty over entity type (0 = certain, ln(N) = maximally uncertain)
|
|
207
|
+
em.evidence_concentration # 1.0 = one source dominates, 0.0 = evidence spread evenly
|
|
208
|
+
em.effective_sample_size # Kish ESS — effective number of independent evidence pieces
|
|
209
|
+
|
|
210
|
+
# Relation-level
|
|
211
|
+
rm = m.relation_measures(relation_id)
|
|
212
|
+
rm.marginal_entropy # uncertainty over relation type
|
|
213
|
+
rm.marginal_perplexity # effective number of competing relation types
|
|
214
|
+
rm.is_contradictory # True if evidence strongly conflicts
|
|
215
|
+
rm.mutual_information # list[CovariateMI] — which covariates are informative
|
|
216
|
+
|
|
217
|
+
# Divergence between two entities or relations
|
|
218
|
+
div = m.divergence(entity_id_a, entity_id_b) # Jensen-Shannon (default)
|
|
219
|
+
div = m.divergence(entity_id_a, entity_id_b, metric="kl") # KL divergence
|
|
220
|
+
print(div.value) # 0.0 = identical distributions, 1.0 = maximally different
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## Curation
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
# Retract — soft delete, reversible. Evidence is ignored in future resolutions.
|
|
227
|
+
event = m.retract(contributor_id)
|
|
228
|
+
print(event.reversible) # True
|
|
229
|
+
|
|
230
|
+
# Hard delete — permanent. Evidence is removed entirely.
|
|
231
|
+
event = m.hard_delete(contributor_id)
|
|
232
|
+
print(event.reversible) # False
|
|
233
|
+
|
|
234
|
+
# Merge — declare that two entity IDs refer to the same real-world entity
|
|
235
|
+
new_entity_id, events = m.merge(["entity-abc", "entity-xyz"])
|
|
236
|
+
|
|
237
|
+
# Split — declare that one entity ID actually contains multiple distinct entities.
|
|
238
|
+
# partition is a list of contributor ID groups, one group per new entity.
|
|
239
|
+
new_entity_ids, events = m.split(entity_id, partition=[
|
|
240
|
+
["contributor-1", "contributor-2"], # -> entity A
|
|
241
|
+
["contributor-3"], # -> entity B
|
|
242
|
+
])
|
|
243
|
+
|
|
244
|
+
# Full audit log
|
|
245
|
+
log = m.event_log()
|
|
246
|
+
for event in log:
|
|
247
|
+
print(event.kind, event.timestamp, event.actor, event.reversible)
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
## Error handling
|
|
251
|
+
|
|
252
|
+
All SDK exceptions subclass `OvertonError`, which exposes `.status_code`. Concrete
|
|
253
|
+
subclasses are keyed by HTTP status code:
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
from overton_sdk import NotFoundError, UnauthorizedError, OvertonError
|
|
257
|
+
|
|
258
|
+
try:
|
|
259
|
+
entity = m.read("entity-123")
|
|
260
|
+
except NotFoundError:
|
|
261
|
+
print("entity not found")
|
|
262
|
+
except UnauthorizedError:
|
|
263
|
+
print("invalid or expired API token")
|
|
264
|
+
except OvertonError as e:
|
|
265
|
+
print(f"API error {e.status_code}: {e}")
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
| Exception | HTTP status | When |
|
|
269
|
+
|---|---|---|
|
|
270
|
+
| `BadRequestError` | 400 | Malformed request |
|
|
271
|
+
| `UnauthorizedError` | 401 | Missing or invalid API token |
|
|
272
|
+
| `PermissionDeniedError` | 403 | Token lacks permission for this action |
|
|
273
|
+
| `NotFoundError` | 404 | Entity, relation, or contributor not found |
|
|
274
|
+
| `UnprocessableEntityError` | 422 | Request failed validation |
|
|
275
|
+
| `InternalServerError` | 5xx | Server-side failure |
|
|
276
|
+
| `OvertonError` | any | Base class — exposes `.status_code` |
|
|
277
|
+
|
|
278
|
+
> [!NOTE]
|
|
279
|
+
> `AuthError` and `ValidationError` remain available as aliases of `UnauthorizedError`
|
|
280
|
+
> and `UnprocessableEntityError` for backwards compatibility.
|
|
281
|
+
|
|
282
|
+
## Static type analysis
|
|
283
|
+
|
|
284
|
+
The SDK ships a `py.typed` marker (PEP 561), so type checkers like mypy and pyright
|
|
285
|
+
resolve its annotations out of the box.
|
|
286
|
+
|
|
287
|
+
```python
|
|
288
|
+
from overton_sdk import (
|
|
289
|
+
OvertonClient,
|
|
290
|
+
ManifoldHandle,
|
|
291
|
+
EntityDistribution,
|
|
292
|
+
RelationDistribution,
|
|
293
|
+
ManifoldMeasures,
|
|
294
|
+
EntityMeasures,
|
|
295
|
+
RelationMeasures,
|
|
296
|
+
DivergenceResult,
|
|
297
|
+
CurationEvent,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
def analyse(m: ManifoldHandle) -> list[EntityDistribution]:
|
|
301
|
+
return m.query("some query", top_k=20)
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## Full example — biomedical literature
|
|
305
|
+
|
|
306
|
+
```python
|
|
307
|
+
import os
|
|
308
|
+
import overton_sdk
|
|
309
|
+
|
|
310
|
+
client = overton_sdk.OvertonClient(
|
|
311
|
+
auth=overton_sdk.UserTokenAuth(os.environ["OVERTON_TOKEN"]),
|
|
312
|
+
hostname="overton.example.com",
|
|
313
|
+
)
|
|
314
|
+
m = client.manifold("biomedical")
|
|
315
|
+
|
|
316
|
+
# Ingest papers with provenance
|
|
317
|
+
m.write(
|
|
318
|
+
{
|
|
319
|
+
"text": "BRCA1 strongly activates PARP1 in the presence of DNA damage",
|
|
320
|
+
"source": "pmid:12345678",
|
|
321
|
+
"covariates": {"cell_line": "HeLa", "year": "2021"},
|
|
322
|
+
},
|
|
323
|
+
{
|
|
324
|
+
"text": "BRCA1 was shown to inhibit PARP1 under normoxic conditions",
|
|
325
|
+
"source": "pmid:99887766",
|
|
326
|
+
"covariates": {"cell_line": "MCF7", "year": "2023"},
|
|
327
|
+
},
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# Find the BRCA1-PARP1 relation
|
|
331
|
+
results = m.query("BRCA1 PARP1 interaction", top_k=5)
|
|
332
|
+
if results:
|
|
333
|
+
entity = results[0]
|
|
334
|
+
print(f"Entity: {entity.point_estimate()} (confidence: {max(entity.type_distribution.values()):.2f})")
|
|
335
|
+
|
|
336
|
+
# Check manifold health
|
|
337
|
+
mm = m.measures()
|
|
338
|
+
print(f"Manifold: {mm.entity_count} entities, {mm.relation_count} relations")
|
|
339
|
+
print(f"Mean uncertainty: {mm.aggregate_entropy:.3f} nats")
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
## Version
|
|
343
|
+
|
|
344
|
+
```python
|
|
345
|
+
import overton_sdk
|
|
346
|
+
print(overton_sdk.__version__) # "0.1.0"
|
|
347
|
+
```
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
# Overton SDK
|
|
2
|
+
|
|
3
|
+
Overton is a probabilistic knowledge graph that resolves entities and relations from
|
|
4
|
+
unstructured text. You write raw evidence in natural language; Overton handles LLM-based
|
|
5
|
+
extraction, entity deduplication, and uncertainty quantification.
|
|
6
|
+
|
|
7
|
+
## Prerequisites
|
|
8
|
+
|
|
9
|
+
You need two things from Overton:
|
|
10
|
+
|
|
11
|
+
- An **API token** (`ovtn_...`)
|
|
12
|
+
- Your **hostname** (e.g. `api.overton.bio`)
|
|
13
|
+
|
|
14
|
+
Overton is a hosted platform — there's nothing to run or operate on your side.
|
|
15
|
+
[Contact us](mailto:hello@overton.bio) to get a token.
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install overton-sdk
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Requires Python 3.11+. The only dependency is `httpx`.
|
|
24
|
+
|
|
25
|
+
## Authorization and client initialization
|
|
26
|
+
|
|
27
|
+
Overton authenticates with the bearer token (`ovtn_...`) Overton issued you.
|
|
28
|
+
Construct an auth object and pass it to the client, along with your hostname:
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import os
|
|
32
|
+
import overton_sdk
|
|
33
|
+
|
|
34
|
+
client = overton_sdk.OvertonClient(
|
|
35
|
+
auth=overton_sdk.UserTokenAuth(os.environ["OVERTON_TOKEN"]),
|
|
36
|
+
hostname="overton.example.com", # bare host; scheme defaults to https
|
|
37
|
+
)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Environment variables
|
|
41
|
+
|
|
42
|
+
If `OVERTON_TOKEN` and `OVERTON_HOSTNAME` are set, the client picks them up with no
|
|
43
|
+
arguments:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
export OVERTON_TOKEN=ovtn_...
|
|
47
|
+
export OVERTON_HOSTNAME=overton.example.com
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
client = overton_sdk.OvertonClient()
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Configuration
|
|
55
|
+
|
|
56
|
+
Tune transport behavior with a `Config` object:
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from overton_sdk import Config
|
|
60
|
+
|
|
61
|
+
client = overton_sdk.OvertonClient(
|
|
62
|
+
auth=overton_sdk.UserTokenAuth(token),
|
|
63
|
+
hostname="overton.example.com",
|
|
64
|
+
config=Config(timeout=60.0, max_retries=3, verify=True),
|
|
65
|
+
)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Use the client as a context manager to ensure connections are closed:
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
with overton_sdk.OvertonClient() as client:
|
|
72
|
+
m = client.manifold("my-manifold")
|
|
73
|
+
...
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
> [!TIP]
|
|
77
|
+
> For local development you can point at a server over plain HTTP by passing a full
|
|
78
|
+
> `hostname="http://localhost:8000"` — an explicit scheme is respected.
|
|
79
|
+
|
|
80
|
+
## Core concepts
|
|
81
|
+
|
|
82
|
+
| Concept | What it is |
|
|
83
|
+
|---|---|
|
|
84
|
+
| **Manifold** | A named knowledge graph. Create as many as you need — one per domain, project, or dataset. |
|
|
85
|
+
| **Evidence** | A piece of text you write in. Overton extracts entities and relations from it using an LLM. |
|
|
86
|
+
| **Contributor ID** | What `write()` returns — the ID of the evidence you just wrote. Use it for raw reads and curation. |
|
|
87
|
+
| **Entity ID** | The ID of a resolved real-world entity. Different from a contributor ID. Get entity IDs from `query()`. |
|
|
88
|
+
| **Covariate** | Structured metadata on evidence (`{"source": "reuters", "year": "2024"}`). Overton uses it to build conditional distributions. |
|
|
89
|
+
|
|
90
|
+
## Quickstart
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
import os
|
|
94
|
+
import overton_sdk
|
|
95
|
+
|
|
96
|
+
client = overton_sdk.OvertonClient(
|
|
97
|
+
auth=overton_sdk.UserTokenAuth(os.environ["OVERTON_TOKEN"]),
|
|
98
|
+
hostname="overton.example.com",
|
|
99
|
+
)
|
|
100
|
+
m = client.manifold("earnings")
|
|
101
|
+
|
|
102
|
+
# Write evidence
|
|
103
|
+
ids = m.write("Apple reported record profits of $94B in Q1 2024, driven by iPhone sales")
|
|
104
|
+
|
|
105
|
+
# Query resolved entities
|
|
106
|
+
results = m.query("tech company earnings", top_k=5)
|
|
107
|
+
for entity in results:
|
|
108
|
+
print(entity.entity_id, entity.point_estimate(), entity.type_distribution)
|
|
109
|
+
|
|
110
|
+
# Manifold statistics
|
|
111
|
+
stats = m.measures()
|
|
112
|
+
print(f"{stats.entity_count} entities, entropy={stats.aggregate_entropy:.3f}")
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Writing evidence
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
# Single string
|
|
119
|
+
m.write("Google acquired DeepMind for £400M in 2014")
|
|
120
|
+
|
|
121
|
+
# Multiple items in one call
|
|
122
|
+
m.write(
|
|
123
|
+
"Apple reported record Q1 profits",
|
|
124
|
+
"Microsoft Azure revenue grew 29% year over year",
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# With source and covariates
|
|
128
|
+
m.write({
|
|
129
|
+
"text": "Apple reported record Q1 profits",
|
|
130
|
+
"source": "reuters",
|
|
131
|
+
"covariates": {"quarter": "Q1-2024", "sentiment": "positive"},
|
|
132
|
+
})
|
|
133
|
+
|
|
134
|
+
# Mix of strings and dicts
|
|
135
|
+
m.write(
|
|
136
|
+
"Apple reported record profits",
|
|
137
|
+
{"text": "Google missed analyst estimates", "covariates": {"sentiment": "negative"}},
|
|
138
|
+
)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
`write()` returns a list of contributor IDs — one per entity or relation mention
|
|
142
|
+
extracted from your text by the LLM.
|
|
143
|
+
|
|
144
|
+
## Reading
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
# Raw read — get back the original text using a contributor ID
|
|
148
|
+
text = m.read(contributor_id, raw=True)
|
|
149
|
+
|
|
150
|
+
# Resolved read — get the entity distribution using an entity ID
|
|
151
|
+
# Note: entity IDs come from query(), not from write()
|
|
152
|
+
entity = m.read(entity_id)
|
|
153
|
+
print(entity.type_distribution) # {"ORG": 0.91, "PERSON": 0.09}
|
|
154
|
+
print(entity.point_estimate()) # "ORG"
|
|
155
|
+
print(entity.contributor_weights) # which evidence pieces contributed and how much
|
|
156
|
+
|
|
157
|
+
# Read a relation
|
|
158
|
+
relation = m.read_relation(relation_id)
|
|
159
|
+
print(relation.type_distribution)
|
|
160
|
+
print(relation.point_estimate({"cell_line": "HeLa"})) # conditional on a covariate
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Querying
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
results = m.query("pharmaceutical company", top_k=10)
|
|
167
|
+
|
|
168
|
+
for entity in results:
|
|
169
|
+
print(entity.entity_id)
|
|
170
|
+
print(entity.point_estimate()) # most likely type
|
|
171
|
+
print(entity.type_distribution) # full probability distribution
|
|
172
|
+
print(entity.alias_ids) # other IDs this entity was known by
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Results are `EntityDistribution` objects ranked by semantic similarity.
|
|
176
|
+
|
|
177
|
+
## Measures
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
# Manifold-level
|
|
181
|
+
mm = m.measures()
|
|
182
|
+
mm.entity_count # number of resolved entities
|
|
183
|
+
mm.relation_count # number of resolved relations
|
|
184
|
+
mm.aggregate_entropy # average type uncertainty across all entities (nats)
|
|
185
|
+
mm.component_count # number of connected subgraphs
|
|
186
|
+
mm.uncertainty_map # {entity_id: entropy} for every entity
|
|
187
|
+
|
|
188
|
+
# Entity-level
|
|
189
|
+
em = m.entity_measures(entity_id)
|
|
190
|
+
em.type_entropy # uncertainty over entity type (0 = certain, ln(N) = maximally uncertain)
|
|
191
|
+
em.evidence_concentration # 1.0 = one source dominates, 0.0 = evidence spread evenly
|
|
192
|
+
em.effective_sample_size # Kish ESS — effective number of independent evidence pieces
|
|
193
|
+
|
|
194
|
+
# Relation-level
|
|
195
|
+
rm = m.relation_measures(relation_id)
|
|
196
|
+
rm.marginal_entropy # uncertainty over relation type
|
|
197
|
+
rm.marginal_perplexity # effective number of competing relation types
|
|
198
|
+
rm.is_contradictory # True if evidence strongly conflicts
|
|
199
|
+
rm.mutual_information # list[CovariateMI] — which covariates are informative
|
|
200
|
+
|
|
201
|
+
# Divergence between two entities or relations
|
|
202
|
+
div = m.divergence(entity_id_a, entity_id_b) # Jensen-Shannon (default)
|
|
203
|
+
div = m.divergence(entity_id_a, entity_id_b, metric="kl") # KL divergence
|
|
204
|
+
print(div.value) # 0.0 = identical distributions, 1.0 = maximally different
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## Curation
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
# Retract — soft delete, reversible. Evidence is ignored in future resolutions.
|
|
211
|
+
event = m.retract(contributor_id)
|
|
212
|
+
print(event.reversible) # True
|
|
213
|
+
|
|
214
|
+
# Hard delete — permanent. Evidence is removed entirely.
|
|
215
|
+
event = m.hard_delete(contributor_id)
|
|
216
|
+
print(event.reversible) # False
|
|
217
|
+
|
|
218
|
+
# Merge — declare that two entity IDs refer to the same real-world entity
|
|
219
|
+
new_entity_id, events = m.merge(["entity-abc", "entity-xyz"])
|
|
220
|
+
|
|
221
|
+
# Split — declare that one entity ID actually contains multiple distinct entities.
|
|
222
|
+
# partition is a list of contributor ID groups, one group per new entity.
|
|
223
|
+
new_entity_ids, events = m.split(entity_id, partition=[
|
|
224
|
+
["contributor-1", "contributor-2"], # -> entity A
|
|
225
|
+
["contributor-3"], # -> entity B
|
|
226
|
+
])
|
|
227
|
+
|
|
228
|
+
# Full audit log
|
|
229
|
+
log = m.event_log()
|
|
230
|
+
for event in log:
|
|
231
|
+
print(event.kind, event.timestamp, event.actor, event.reversible)
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## Error handling
|
|
235
|
+
|
|
236
|
+
All SDK exceptions subclass `OvertonError`, which exposes `.status_code`. Concrete
|
|
237
|
+
subclasses are keyed by HTTP status code:
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
from overton_sdk import NotFoundError, UnauthorizedError, OvertonError
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
entity = m.read("entity-123")
|
|
244
|
+
except NotFoundError:
|
|
245
|
+
print("entity not found")
|
|
246
|
+
except UnauthorizedError:
|
|
247
|
+
print("invalid or expired API token")
|
|
248
|
+
except OvertonError as e:
|
|
249
|
+
print(f"API error {e.status_code}: {e}")
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
| Exception | HTTP status | When |
|
|
253
|
+
|---|---|---|
|
|
254
|
+
| `BadRequestError` | 400 | Malformed request |
|
|
255
|
+
| `UnauthorizedError` | 401 | Missing or invalid API token |
|
|
256
|
+
| `PermissionDeniedError` | 403 | Token lacks permission for this action |
|
|
257
|
+
| `NotFoundError` | 404 | Entity, relation, or contributor not found |
|
|
258
|
+
| `UnprocessableEntityError` | 422 | Request failed validation |
|
|
259
|
+
| `InternalServerError` | 5xx | Server-side failure |
|
|
260
|
+
| `OvertonError` | any | Base class — exposes `.status_code` |
|
|
261
|
+
|
|
262
|
+
> [!NOTE]
|
|
263
|
+
> `AuthError` and `ValidationError` remain available as aliases of `UnauthorizedError`
|
|
264
|
+
> and `UnprocessableEntityError` for backwards compatibility.
|
|
265
|
+
|
|
266
|
+
## Static type analysis
|
|
267
|
+
|
|
268
|
+
The SDK ships a `py.typed` marker (PEP 561), so type checkers like mypy and pyright
|
|
269
|
+
resolve its annotations out of the box.
|
|
270
|
+
|
|
271
|
+
```python
|
|
272
|
+
from overton_sdk import (
|
|
273
|
+
OvertonClient,
|
|
274
|
+
ManifoldHandle,
|
|
275
|
+
EntityDistribution,
|
|
276
|
+
RelationDistribution,
|
|
277
|
+
ManifoldMeasures,
|
|
278
|
+
EntityMeasures,
|
|
279
|
+
RelationMeasures,
|
|
280
|
+
DivergenceResult,
|
|
281
|
+
CurationEvent,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
def analyse(m: ManifoldHandle) -> list[EntityDistribution]:
|
|
285
|
+
return m.query("some query", top_k=20)
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
## Full example — biomedical literature
|
|
289
|
+
|
|
290
|
+
```python
|
|
291
|
+
import os
|
|
292
|
+
import overton_sdk
|
|
293
|
+
|
|
294
|
+
client = overton_sdk.OvertonClient(
|
|
295
|
+
auth=overton_sdk.UserTokenAuth(os.environ["OVERTON_TOKEN"]),
|
|
296
|
+
hostname="overton.example.com",
|
|
297
|
+
)
|
|
298
|
+
m = client.manifold("biomedical")
|
|
299
|
+
|
|
300
|
+
# Ingest papers with provenance
|
|
301
|
+
m.write(
|
|
302
|
+
{
|
|
303
|
+
"text": "BRCA1 strongly activates PARP1 in the presence of DNA damage",
|
|
304
|
+
"source": "pmid:12345678",
|
|
305
|
+
"covariates": {"cell_line": "HeLa", "year": "2021"},
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
"text": "BRCA1 was shown to inhibit PARP1 under normoxic conditions",
|
|
309
|
+
"source": "pmid:99887766",
|
|
310
|
+
"covariates": {"cell_line": "MCF7", "year": "2023"},
|
|
311
|
+
},
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Find the BRCA1-PARP1 relation
|
|
315
|
+
results = m.query("BRCA1 PARP1 interaction", top_k=5)
|
|
316
|
+
if results:
|
|
317
|
+
entity = results[0]
|
|
318
|
+
print(f"Entity: {entity.point_estimate()} (confidence: {max(entity.type_distribution.values()):.2f})")
|
|
319
|
+
|
|
320
|
+
# Check manifold health
|
|
321
|
+
mm = m.measures()
|
|
322
|
+
print(f"Manifold: {mm.entity_count} entities, {mm.relation_count} relations")
|
|
323
|
+
print(f"Mean uncertainty: {mm.aggregate_entropy:.3f} nats")
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
## Version
|
|
327
|
+
|
|
328
|
+
```python
|
|
329
|
+
import overton_sdk
|
|
330
|
+
print(overton_sdk.__version__) # "0.1.0"
|
|
331
|
+
```
|