knowledge-graph-rdbms 0.1.4__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/.github/workflows/ci.yml +3 -3
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/CLAUDE.md +17 -1
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/PKG-INFO +176 -21
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/README.md +175 -20
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/__init__.py +6 -1
- knowledge_graph_rdbms-0.1.6/kgrdbms/backbone.py +265 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/cli.py +255 -3
- knowledge_graph_rdbms-0.1.6/kgrdbms/federation.py +215 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/mcp_server.py +258 -85
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/resolver.py +48 -0
- knowledge_graph_rdbms-0.1.6/kgrdbms/virtual.py +294 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/pyproject.toml +1 -1
- knowledge_graph_rdbms-0.1.6/tests/test_backbone.py +69 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_cli.py +51 -0
- knowledge_graph_rdbms-0.1.6/tests/test_federation.py +84 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_mcp_server.py +52 -12
- knowledge_graph_rdbms-0.1.6/tests/test_virtual.py +124 -0
- knowledge_graph_rdbms-0.1.6/uv.lock +1984 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/.claude/skills/kg-compose/SKILL.md +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/.github/workflows/publish.yml +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/.gitignore +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/CODE_OF_CONDUCT.md +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/CONTRIBUTING.md +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/LICENSE +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/SECURITY.md +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/assets/crossover.png +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/assets/read_latency.png +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/assets/runtimes.png +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/assets/write_throughput.png +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/README.md +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/benchmark.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/charts.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/neo4j/README.md +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/neo4j/headtohead.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/postgres/README.md +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/postgres/benchmark.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/postgres/charts.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/runtimes/compare.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/runtimes/run_bun.js +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/runtimes/run_node.mjs +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/runtimes/run_python.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/backends/__init__.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/backends/base.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/backends/neo4j.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/backends/postgres.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/backends/sqlite.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/events.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/graph.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/invariants.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/policy.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/rdf.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/service.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_bulk.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_events.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_graph.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_policy.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_postgres.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_rdf.py +0 -0
- {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_schema.py +0 -0
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
name: CI
|
|
2
2
|
|
|
3
|
-
# Runs the test suite on every push to
|
|
3
|
+
# Runs the test suite on every push to main and every pull request.
|
|
4
4
|
# Make the `ci` job below a required status check in branch protection to
|
|
5
5
|
# gate merges on a green suite.
|
|
6
6
|
|
|
7
7
|
on:
|
|
8
8
|
push:
|
|
9
|
-
branches: [
|
|
9
|
+
branches: [main]
|
|
10
10
|
pull_request:
|
|
11
|
-
branches: [
|
|
11
|
+
branches: [main]
|
|
12
12
|
|
|
13
13
|
concurrency:
|
|
14
14
|
group: ci-${{ github.ref }}
|
|
@@ -26,6 +26,11 @@ kg schema --samples # + example ids and enum-like property
|
|
|
26
26
|
kg ontology list # the registry (the "db of dbs")
|
|
27
27
|
kg ontology create coffee --stance inferential # register a named ontology
|
|
28
28
|
kg --ontology coffee node add drink:latte --kind Drink # route to it (resolver)
|
|
29
|
+
kg fed schema # union vocabulary across ALL ontologies (multithreaded fan-out)
|
|
30
|
+
kg fed node person:ada # find an id across the federation (identity-aware)
|
|
31
|
+
kg link add coffee drink:latte ENJOYED_BY people person:ada # cross-ontology edge (backbone)
|
|
32
|
+
kg link same-as people person:ada wiki person:ada-lovelace # assert same real-world entity
|
|
33
|
+
kg prefix add person https://kg.local/person/ # CURIE prefix -> IRI (identity backbone)
|
|
29
34
|
kg --db /tmp/x.db node add a:1 --kind T # raw escape hatch: exact file, no registry
|
|
30
35
|
kg serve # run the MCP server (needs [mcp] extra)
|
|
31
36
|
```
|
|
@@ -68,6 +73,15 @@ The engine above (`graph.py` + `service.py` + the log + the gate) operates on **
|
|
|
68
73
|
- **The event log is decoupled from the backend.** `EventLog(store, projection=None)`: *store* is the SQLite that holds the log rows; *projection* is the `GraphBackend` that `compensate()`/replay apply to. They coincide for sqlite (`EventLog(graph)` — projection defaults to store, unchanged). For postgres the store is `resolver._ControlPlaneLogStore` (a `<root>/ontologies/<slug>/events.db` sidecar) and the projection is the `PostgresGraph` — so audit/replay/undo keep working with graph data in Postgres and history in SQLite. `apply_event` only calls `GraphBackend` methods, so it drives any backend. This store↔projection split is the seam to respect for *any* non-sqlite engine (neo4j next).
|
|
69
74
|
- **New failure class:** routing to a stub engine raises `NotImplementedError`. Every front door's error handling must account for it (the CLI's `main()` already maps it to `unavailable: …` / exit 1).
|
|
70
75
|
|
|
76
|
+
## Cross-ontology: federation (reads) + the backbone (links)
|
|
77
|
+
|
|
78
|
+
The control plane routes to **one** ontology per call. Two modules sit on top to work across **many** at once — and the split is deliberate: reads federate, writes go through a backbone.
|
|
79
|
+
|
|
80
|
+
- **`federation.py` — cross-ontology READS, multithreaded by default.** A federated read is a *fan-out*: open each member ontology in its own thread (own connection — SQLite/psycopg release the GIL during a query, so N ontologies read concurrently) and merge the results tagged by source. `Federation(names)` or `Federation.all()`; methods mirror the single-graph reads (`schema`, `stats`, `nodes_by_kind`, `nodes_by_label`, `node`) plus `identity()`. Each worker calls `resolver.resolve()` in its *own* thread, so no connection is ever shared across threads. `parallel=False` forces sequential for debugging. **Federation never writes** and never silently drops a member (a member that raises propagates).
|
|
81
|
+
- **`backbone.py` — cross-ontology LINKS + the prefix registry.** A leaf edge can't cross ontologies (its FK lives in one file). The backbone is where cross-ontology structure lives, and **it needs no new storage: it IS the index graph** (`<root>/index.db`) growing new kinds. A link becomes a `Ref` proxy node per endpoint (id = `<ontology>::<node_id>`, FK-satisfied inside the index) joined by an edge; the prefix registry becomes `Prefix` nodes. **Both go through the gated + logged `service` path against the index's own event log** — so cross-domain assertions (`link`, `same_as`) are audited, reversible, and replayable exactly like leaf data. `SAME_AS` is symmetric and traversed transitively by `identity_cluster()`.
|
|
82
|
+
- **Identity is opt-in per ontology.** `OntologyEntry.shared_identity` (default `False` = local): when `True`, federation treats same-CURIE nodes across such ontologies as the *same* entity and merges them in `Federation.node()`. Everyone else stays local — link explicitly via the backbone. This is the lightweight LPG answer to RDF's global-IRI identity: stay local by default, adopt shared identity surgically.
|
|
83
|
+
- **The qualified-id delimiter is `::`** (`coffee::drink:latte`), distinct from the single-colon CURIE so a node's own colons survive `unqualify()`. Don't reuse `:`.
|
|
84
|
+
|
|
71
85
|
## Layout
|
|
72
86
|
|
|
73
87
|
```
|
|
@@ -78,6 +92,8 @@ kgrdbms/
|
|
|
78
92
|
├── invariants.py # compiled-in invariants, run before policy (no-op default)
|
|
79
93
|
├── service.py # the shared gated + logged write path (all front doors use this)
|
|
80
94
|
├── resolver.py # control plane: name → (backend, events, entry); the ontology index
|
|
95
|
+
├── federation.py # cross-ontology READS: multithreaded fan-out, identity-aware merge
|
|
96
|
+
├── backbone.py # cross-ontology LINKS + prefix/IRI registry (lives in the index graph)
|
|
81
97
|
├── backends/ # pluggable data plane (engine registry)
|
|
82
98
|
│ ├── base.py # GraphBackend Protocol + _StubBackend
|
|
83
99
|
│ ├── __init__.py # registry: @backend(name), get_backend, available_backends
|
|
@@ -88,7 +104,7 @@ kgrdbms/
|
|
|
88
104
|
└── mcp_server.py # MCP server, kg_-prefixed tools, each with optional `ontology=` (optional [mcp] extra)
|
|
89
105
|
```
|
|
90
106
|
|
|
91
|
-
`graph.py` has no internal dependencies — everything else layers on top of it. Dependency direction: `graph` ← `events`/`backends` ← `resolver` ← `service`-callers (`cli`, `mcp_server`). `service.py` depends only on the `GraphBackend` surface, never a concrete engine. Public API is re-exported from `__init__.py`.
|
|
107
|
+
`graph.py` has no internal dependencies — everything else layers on top of it. Dependency direction: `graph` ← `events`/`backends` ← `resolver` ← `backbone`/`federation` ← `service`-callers (`cli`, `mcp_server`). `backbone` builds on `resolver` + `service` (the index is its store); `federation` builds on `resolver` (+ `backbone` for identity clusters). `service.py` depends only on the `GraphBackend` surface, never a concrete engine. Public API is re-exported from `__init__.py`.
|
|
92
108
|
|
|
93
109
|
## Node id convention (CURIEs)
|
|
94
110
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: knowledge-graph-rdbms
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: A label property graph on an RDBMS (SQLite): nodes, typed edges, an append-only event log, and an optional MCP server.
|
|
5
5
|
Project-URL: Homepage, https://github.com/cunicopia-dev/knowledge-graph-rdbms
|
|
6
6
|
Project-URL: Repository, https://github.com/cunicopia-dev/knowledge-graph-rdbms
|
|
@@ -31,11 +31,12 @@ Description-Content-Type: text/markdown
|
|
|
31
31
|
|
|
32
32
|
# knowledge-graph-rdbms
|
|
33
33
|
|
|
34
|
+

|
|
34
35
|

|
|
35
36
|

|
|
36
37
|

|
|
37
|
-

|
|
39
|
+

|
|
39
40
|

|
|
40
41
|
|
|
41
42
|
**A knowledge graph for modeling _meaning_ — entities, the kinds of things they
|
|
@@ -77,6 +78,8 @@ Small enough to hold in your head. Flexible enough to model anything.
|
|
|
77
78
|
- [The data model](#the-data-model)
|
|
78
79
|
- [Architecture: three front doors, one engine](#architecture-three-front-doors-one-engine)
|
|
79
80
|
- [Many ontologies: one control plane](#many-ontologies-one-control-plane)
|
|
81
|
+
- [Discovery: read the schema before you query](#discovery-read-the-schema-before-you-query)
|
|
82
|
+
- [Cross-ontology: federation and the backbone](#cross-ontology-federation-and-the-backbone)
|
|
80
83
|
- [Event sourcing: the graph is a projection](#event-sourcing-the-graph-is-a-projection)
|
|
81
84
|
- [The safety gate: invariants vs. policy](#the-safety-gate-invariants-vs-policy)
|
|
82
85
|
- [Install](#install)
|
|
@@ -358,6 +361,94 @@ NAME` routes through the resolver (named, registered, multi-engine), while
|
|
|
358
361
|
|
|
359
362
|
---
|
|
360
363
|
|
|
364
|
+
## Discovery: read the schema before you query
|
|
365
|
+
|
|
366
|
+
A graph you didn't build is opaque: which `kind`s exist? which edge types? what
|
|
367
|
+
property keys live on a `Person`? `schema()` answers all of it in **one read** —
|
|
368
|
+
the map to read before querying, rather than guessing with trial calls.
|
|
369
|
+
|
|
370
|
+
```bash
|
|
371
|
+
kg schema # kinds, edge types, labels, and property keys per kind — with counts
|
|
372
|
+
kg schema --samples # + a few example ids per kind and the enum-like values a key takes
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
What comes back is the *observed* vocabulary — a profile of what's actually in
|
|
376
|
+
the graph, not an enforced schema (the graph stays schemaless). The MCP tool
|
|
377
|
+
`kg_schema` carries an instruction to call it **first**, so an agent reads the map
|
|
378
|
+
before it moves. It's a plain read — pure `GROUP BY` aggregates, no gate — and
|
|
379
|
+
like every read it has a federated form (next) that unions the vocabulary across
|
|
380
|
+
many ontologies at once.
|
|
381
|
+
|
|
382
|
+
---
|
|
383
|
+
|
|
384
|
+
## Cross-ontology: federation and the backbone
|
|
385
|
+
|
|
386
|
+
The control plane routes to *one* ontology per call. Two layers sit on top to work
|
|
387
|
+
across *many* at once: **reads federate, writes go through a backbone.**
|
|
388
|
+
|
|
389
|
+
```mermaid
|
|
390
|
+
flowchart TD
|
|
391
|
+
Q["kg fed node person:ada"] --> FED["federation.py<br/>multithreaded fan-out"]
|
|
392
|
+
FED -.->|own thread + connection| O1[("people")]
|
|
393
|
+
FED -.->|own thread + connection| O2[("papers")]
|
|
394
|
+
FED -.->|own thread + connection| O3[("coffee")]
|
|
395
|
+
O1 --> M["merge, tagged by source<br/>(identity-aware)"]
|
|
396
|
+
O2 --> M
|
|
397
|
+
O3 --> M
|
|
398
|
+
|
|
399
|
+
L["kg link same-as …"] --> BB["backbone.py"]
|
|
400
|
+
BB --> IDX[["index.db — the backbone<br/>Ref + Prefix nodes, SAME_AS edges<br/>gated + logged"]]
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
### Federation — cross-ontology reads, multithreaded
|
|
404
|
+
|
|
405
|
+
A federated read is a *fan-out*: open each member ontology in its own thread (its
|
|
406
|
+
own connection — SQLite releases the GIL during a query, so N ontologies read
|
|
407
|
+
**concurrently**) and merge the results tagged by source. There's no separate
|
|
408
|
+
"federated" API: every base read takes an optional `ontologies=[...]` scope.
|
|
409
|
+
|
|
410
|
+
```python
|
|
411
|
+
from kgrdbms import Federation
|
|
412
|
+
|
|
413
|
+
fed = Federation(["people", "papers", "coffee"]) # or Federation.all()
|
|
414
|
+
fed.schema() # unioned vocabulary + a per-ontology breakdown
|
|
415
|
+
fed.nodes_by_kind("Person") # [Located(ontology, node), ...] — tagged by source
|
|
416
|
+
fed.node("person:ada") # every occurrence across worlds, identity-merged
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
```bash
|
|
420
|
+
kg fed schema # union vocabulary across ALL ontologies
|
|
421
|
+
kg fed node person:ada # find an id across the federation (identity-aware)
|
|
422
|
+
```
|
|
423
|
+
|
|
424
|
+
Federation never writes and never silently drops a member (a member that raises
|
|
425
|
+
propagates); `parallel=False` forces sequential for debugging.
|
|
426
|
+
|
|
427
|
+
### The backbone — cross-ontology links
|
|
428
|
+
|
|
429
|
+
A leaf edge can't cross ontologies: its foreign key lives in one file. The backbone
|
|
430
|
+
is where cross-ontology structure lives, and it needs **no new storage — it *is* the
|
|
431
|
+
index graph** growing new kinds. A link becomes a lightweight `Ref` proxy node per
|
|
432
|
+
endpoint (id `<ontology>::<node_id>`, FK-satisfied *inside* the index) joined by an
|
|
433
|
+
edge; the prefix registry becomes `Prefix` nodes. Both go through the **same gated +
|
|
434
|
+
logged `service` path**, so a cross-domain assertion is audited, reversible, and
|
|
435
|
+
replayable exactly like leaf data.
|
|
436
|
+
|
|
437
|
+
```bash
|
|
438
|
+
kg link add coffee drink:latte ENJOYED_BY people person:ada # a typed cross-ontology edge
|
|
439
|
+
kg link same-as people person:ada wiki person:ada-lovelace # "same real-world entity" (symmetric)
|
|
440
|
+
kg link cluster people person:ada # the transitive SAME_AS cluster
|
|
441
|
+
kg prefix add person https://kg.local/person/ # CURIE prefix -> IRI (identity backbone)
|
|
442
|
+
```
|
|
443
|
+
|
|
444
|
+
**Identity is opt-in per ontology.** By default identity is *local* — `person:ada`
|
|
445
|
+
in two ontologies are different nodes until you link them via the backbone. An
|
|
446
|
+
ontology created with `--shared-identity` opts into *global* identity, and federation
|
|
447
|
+
then treats same-CURIE nodes across such ontologies as the **same** entity and merges
|
|
448
|
+
them.
|
|
449
|
+
|
|
450
|
+
---
|
|
451
|
+
|
|
361
452
|
## Event sourcing: the graph is a projection
|
|
362
453
|
|
|
363
454
|
The graph you query is a cache. The **append-only event log is the source of
|
|
@@ -425,6 +516,55 @@ replay(graph, events, genesis=genesis, upto_ts=ts) # ...as of an instant
|
|
|
425
516
|
|
|
426
517
|
---
|
|
427
518
|
|
|
519
|
+
## Virtual edges: relationships you don't store
|
|
520
|
+
|
|
521
|
+
Event sourcing is the right model for *curated facts* — but the wrong one for a
|
|
522
|
+
high-cardinality, machine-generated relationship layer that already lives, fresh
|
|
523
|
+
and authoritative, in some operational store. Mirroring 100k correlation edges
|
|
524
|
+
into the graph (and the log) every night is wasteful and instantly stale.
|
|
525
|
+
|
|
526
|
+
A **virtual edge** inverts that. The ontology stores only a *binding* — an edge
|
|
527
|
+
TYPE plus the SQL that resolves its instances against an external source. At
|
|
528
|
+
traversal time the resolver runs that query, parameterized by the node you're
|
|
529
|
+
standing on, and synthesizes the edges live. Zero copy, always current, one
|
|
530
|
+
source of truth — Ontology-Based Data Access in the graph's own terms.
|
|
531
|
+
|
|
532
|
+
```python
|
|
533
|
+
# Bind CO_HELD_WITH to a query over the operational store (here, any DB-API source)
|
|
534
|
+
kg_virtual_edge_add(
|
|
535
|
+
edge_type="CO_HELD_WITH",
|
|
536
|
+
query="SELECT b AS to_id, shared FROM co_held WHERE a = ?", # '?' sqlite, '%s' postgres
|
|
537
|
+
dsn_env="OUROBOROS_DSN", # credentials by reference — never in the graph
|
|
538
|
+
source="id_slug", # company:NVDA -> bind "NVDA"
|
|
539
|
+
target_id_template="company:{value}",
|
|
540
|
+
prop_cols=["shared"], directions="both", ontology="market",
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
# now ordinary traversal unions stored + virtual edges; virtual ones carry _virtual:true
|
|
544
|
+
kg_edges("company:NVDA", direction="out", ontology="market")
|
|
545
|
+
# -> [{to: "company:AMD", type: "CO_HELD_WITH", properties: {shared: 12, _virtual: true}, …}]
|
|
546
|
+
```
|
|
547
|
+
|
|
548
|
+
Two properties keep it safe and simple:
|
|
549
|
+
|
|
550
|
+
- **Read-only.** Virtual edges are never written, so they sidestep the whole
|
|
551
|
+
gated-write / event-log / compensation machinery. A binding is config; the
|
|
552
|
+
edges are a view. Nothing to invalidate, replay, or undo.
|
|
553
|
+
- **Parameterized, never interpolated.** The SQL template is operator-authored;
|
|
554
|
+
the per-node value is always *bound* through the driver, never formatted into
|
|
555
|
+
the string. Credentials ride `dsn_env` (an env-var name), so secrets stay out
|
|
556
|
+
of the graph and out of version control.
|
|
557
|
+
|
|
558
|
+
Bindings live as reserved-kind (`_VirtualEdge`) nodes *in the ontology*, so they
|
|
559
|
+
travel with it and version alongside the schema. This is the seam for a
|
|
560
|
+
schema-graph / data-graph split: keep the curated **schema** (types, contracts,
|
|
561
|
+
doctrine) in a portable SQLite ontology, and **virtualize the populated extension**
|
|
562
|
+
straight out of your system-of-record — no ETL, no drift.
|
|
563
|
+
|
|
564
|
+
Tools: `kg_virtual_edge_add`, `kg_virtual_edges_list`, `kg_virtual_edge_remove`.
|
|
565
|
+
|
|
566
|
+
---
|
|
567
|
+
|
|
428
568
|
## The safety gate: invariants vs. policy
|
|
429
569
|
|
|
430
570
|
When you expose the graph for live mutation — especially to an AI agent over
|
|
@@ -572,18 +712,20 @@ Or hand-edit a client config (e.g. Claude Desktop):
|
|
|
572
712
|
{ "mcpServers": { "kgrdbms": { "command": "kgrdbms-mcp" } } }
|
|
573
713
|
```
|
|
574
714
|
|
|
575
|
-
It exposes `kg_`-prefixed tools
|
|
576
|
-
be called first
|
|
577
|
-
`kg_neighborhood`, `kg_shortest_path`, `kg_descendants
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
(`
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
715
|
+
It exposes `kg_`-prefixed tools over one engine. Reads — `kg_schema` (the
|
|
716
|
+
vocabulary, meant to be called first), `kg_node_get`, `kg_find` (by kind and/or
|
|
717
|
+
label), `kg_edges`, `kg_neighborhood`, `kg_shortest_path`, `kg_descendants` — each
|
|
718
|
+
take an optional `ontologies=[...]` to fan out across many ontologies in one
|
|
719
|
+
call. Then gated writes (`kg_node_upsert`, `kg_edge_add`, `kg_edge_remove`,
|
|
720
|
+
`kg_node_delete`), bulk composition (`kg_import` — a whole `{nodes, edges}` batch in
|
|
721
|
+
one call, so an agent populates an ontology in a single tool call instead of dozens),
|
|
722
|
+
the cross-ontology backbone (`kg_link`, `kg_links_of`, `kg_identity`, `kg_prefix_add`,
|
|
723
|
+
`kg_prefix_resolve`), RDF interop (`kg_rdf_export`, `kg_rdf_import` — see below), and
|
|
724
|
+
the event log (`kg_events_tail`, `kg_event_revert`, `kg_replay`). Every write passes
|
|
725
|
+
the invariants + policy gate and is recorded — same engine, same file as the CLI.
|
|
726
|
+
Every tool takes an optional `ontology` name, and `kg_ontologies_list` /
|
|
727
|
+
`kg_ontology_create` / `kg_ontology_delete` manage the registry — so an agent can
|
|
728
|
+
discover, create, route between, and delete ontologies entirely over MCP.
|
|
587
729
|
|
|
588
730
|
---
|
|
589
731
|
|
|
@@ -604,7 +746,7 @@ that data by `bench/charts.py`. Run both on your own machine in one command.
|
|
|
604
746
|
|
|
605
747
|
### Writes — the batching lever
|
|
606
748
|
|
|
607
|
-

|
|
608
750
|
|
|
609
751
|
Each single write commits on its own for durability. Wrapping a bulk load in
|
|
610
752
|
`batch()` / `add_nodes` / `add_edges` collapses those per-call commits into one
|
|
@@ -615,7 +757,7 @@ thousands per second.
|
|
|
615
757
|
|
|
616
758
|
### Reads — fast, with an honest tail
|
|
617
759
|
|
|
618
|
-

|
|
619
761
|
|
|
620
762
|
Point lookups land in single-digit microseconds, and multi-node reads hydrate
|
|
621
763
|
the whole result set in a constant number of queries (no N+1 fan-out). The chart
|
|
@@ -629,7 +771,7 @@ SQLite engine runs under CPython, Node, and Bun, so the gap between them is pure
|
|
|
629
771
|
binding overhead — under 2×, and it doesn't even favor one runtime across
|
|
630
772
|
operations.
|
|
631
773
|
|
|
632
|
-

|
|
633
775
|
|
|
634
776
|
The lever that actually moved the needle was transaction batching (~10×, above),
|
|
635
777
|
not the language. Reproduce it with `python bench/runtimes/compare.py`.
|
|
@@ -639,7 +781,7 @@ not the language. Reproduce it with `python bench/runtimes/compare.py`.
|
|
|
639
781
|
We measured it against Neo4j — same graph, same queries, identical methodology
|
|
640
782
|
(full harness and reproduction in [`bench/neo4j/`](bench/neo4j/README.md)):
|
|
641
783
|
|
|
642
|
-

|
|
643
785
|
|
|
644
786
|
Queries compile to SQL over B-tree indexes, so each traversal hop is an index
|
|
645
787
|
lookup — wonderfully cheap for point reads and shallow traversals. An in-process
|
|
@@ -768,7 +910,18 @@ replayable.
|
|
|
768
910
|
| `kg rdf export [--format F]` | serialize to Turtle/N-Triples (RDF-star) |
|
|
769
911
|
| `kg rdf import FILE` | load RDF back in (gated + logged) |
|
|
770
912
|
| `kg ontology list` | list registered ontologies (the registry) |
|
|
771
|
-
| `kg ontology create NAME …` | register an ontology (`--backend`, `--stance`) |
|
|
913
|
+
| `kg ontology create NAME …` | register an ontology (`--backend`, `--stance`, `--shared-identity`) |
|
|
914
|
+
| `kg ontology delete NAME [--purge]` | deregister an ontology (`--purge` also deletes its data) |
|
|
915
|
+
| `kg fed schema [--samples]` | union vocabulary across ALL ontologies (multithreaded fan-out) |
|
|
916
|
+
| `kg fed stats` | node/edge totals across the federation |
|
|
917
|
+
| `kg fed nodes-by-kind KIND` / `nodes-by-label LABEL` | nodes across ontologies, tagged by source |
|
|
918
|
+
| `kg fed node ID` | find an id across the federation (identity-aware) |
|
|
919
|
+
| `kg link add FROM_ONT FROM TYPE TO_ONT TO` | cross-ontology edge (the backbone) |
|
|
920
|
+
| `kg link same-as A FROM B TO` | assert two nodes are the same entity (symmetric) |
|
|
921
|
+
| `kg link of ONT ID` | cross-ontology links touching a node |
|
|
922
|
+
| `kg link cluster ONT ID` | the transitive SAME_AS identity cluster |
|
|
923
|
+
| `kg prefix add P IRI_BASE` | bind a CURIE prefix to an IRI base |
|
|
924
|
+
| `kg prefix expand CURIE` / `contract IRI` | CURIE ↔ IRI via the registry |
|
|
772
925
|
| `kg serve [--transport T]` | run the MCP server |
|
|
773
926
|
|
|
774
927
|
Add `--json` to any command for machine-readable output. Target a graph with
|
|
@@ -787,6 +940,8 @@ kgrdbms/
|
|
|
787
940
|
├── invariants.py # compiled-in invariants, checked before policy (no-op default)
|
|
788
941
|
├── service.py # the shared gated + logged write path
|
|
789
942
|
├── resolver.py # control plane: ontology name → (backend, events, entry) + the index
|
|
943
|
+
├── federation.py # cross-ontology reads: multithreaded fan-out, identity-aware merge
|
|
944
|
+
├── backbone.py # cross-ontology links + prefix/IRI registry (lives in the index graph)
|
|
790
945
|
├── backends/ # pluggable engine registry
|
|
791
946
|
│ ├── base.py # GraphBackend protocol + raising stub skeleton
|
|
792
947
|
│ ├── sqlite.py # live engine (adapter over Graph)
|
|
@@ -808,7 +963,7 @@ own. Everything else layers on top; `service.py` depends only on the
|
|
|
808
963
|
```bash
|
|
809
964
|
git clone <repo> && cd knowledge-graph-rdbms
|
|
810
965
|
uv venv && uv pip install -e ".[dev]"
|
|
811
|
-
pytest #
|
|
966
|
+
pytest # 107 tests
|
|
812
967
|
python bench/benchmark.py # benchmark with p50–p99 (see bench/README.md)
|
|
813
968
|
```
|
|
814
969
|
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
# knowledge-graph-rdbms
|
|
2
2
|
|
|
3
|
+

|
|
3
4
|

|
|
4
5
|

|
|
5
6
|

|
|
6
|
-

|
|
8
|
+

|
|
8
9
|

|
|
9
10
|
|
|
10
11
|
**A knowledge graph for modeling _meaning_ — entities, the kinds of things they
|
|
@@ -46,6 +47,8 @@ Small enough to hold in your head. Flexible enough to model anything.
|
|
|
46
47
|
- [The data model](#the-data-model)
|
|
47
48
|
- [Architecture: three front doors, one engine](#architecture-three-front-doors-one-engine)
|
|
48
49
|
- [Many ontologies: one control plane](#many-ontologies-one-control-plane)
|
|
50
|
+
- [Discovery: read the schema before you query](#discovery-read-the-schema-before-you-query)
|
|
51
|
+
- [Cross-ontology: federation and the backbone](#cross-ontology-federation-and-the-backbone)
|
|
49
52
|
- [Event sourcing: the graph is a projection](#event-sourcing-the-graph-is-a-projection)
|
|
50
53
|
- [The safety gate: invariants vs. policy](#the-safety-gate-invariants-vs-policy)
|
|
51
54
|
- [Install](#install)
|
|
@@ -327,6 +330,94 @@ NAME` routes through the resolver (named, registered, multi-engine), while
|
|
|
327
330
|
|
|
328
331
|
---
|
|
329
332
|
|
|
333
|
+
## Discovery: read the schema before you query
|
|
334
|
+
|
|
335
|
+
A graph you didn't build is opaque: which `kind`s exist? which edge types? what
|
|
336
|
+
property keys live on a `Person`? `schema()` answers all of it in **one read** —
|
|
337
|
+
the map to read before querying, rather than guessing with trial calls.
|
|
338
|
+
|
|
339
|
+
```bash
|
|
340
|
+
kg schema # kinds, edge types, labels, and property keys per kind — with counts
|
|
341
|
+
kg schema --samples # + a few example ids per kind and the enum-like values a key takes
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
What comes back is the *observed* vocabulary — a profile of what's actually in
|
|
345
|
+
the graph, not an enforced schema (the graph stays schemaless). The MCP tool
|
|
346
|
+
`kg_schema` carries an instruction to call it **first**, so an agent reads the map
|
|
347
|
+
before it moves. It's a plain read — pure `GROUP BY` aggregates, no gate — and
|
|
348
|
+
like every read it has a federated form (next) that unions the vocabulary across
|
|
349
|
+
many ontologies at once.
|
|
350
|
+
|
|
351
|
+
---
|
|
352
|
+
|
|
353
|
+
## Cross-ontology: federation and the backbone
|
|
354
|
+
|
|
355
|
+
The control plane routes to *one* ontology per call. Two layers sit on top to work
|
|
356
|
+
across *many* at once: **reads federate, writes go through a backbone.**
|
|
357
|
+
|
|
358
|
+
```mermaid
|
|
359
|
+
flowchart TD
|
|
360
|
+
Q["kg fed node person:ada"] --> FED["federation.py<br/>multithreaded fan-out"]
|
|
361
|
+
FED -.->|own thread + connection| O1[("people")]
|
|
362
|
+
FED -.->|own thread + connection| O2[("papers")]
|
|
363
|
+
FED -.->|own thread + connection| O3[("coffee")]
|
|
364
|
+
O1 --> M["merge, tagged by source<br/>(identity-aware)"]
|
|
365
|
+
O2 --> M
|
|
366
|
+
O3 --> M
|
|
367
|
+
|
|
368
|
+
L["kg link same-as …"] --> BB["backbone.py"]
|
|
369
|
+
BB --> IDX[["index.db — the backbone<br/>Ref + Prefix nodes, SAME_AS edges<br/>gated + logged"]]
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
### Federation — cross-ontology reads, multithreaded
|
|
373
|
+
|
|
374
|
+
A federated read is a *fan-out*: open each member ontology in its own thread (its
|
|
375
|
+
own connection — SQLite releases the GIL during a query, so N ontologies read
|
|
376
|
+
**concurrently**) and merge the results tagged by source. There's no separate
|
|
377
|
+
"federated" API: every base read takes an optional `ontologies=[...]` scope.
|
|
378
|
+
|
|
379
|
+
```python
|
|
380
|
+
from kgrdbms import Federation
|
|
381
|
+
|
|
382
|
+
fed = Federation(["people", "papers", "coffee"]) # or Federation.all()
|
|
383
|
+
fed.schema() # unioned vocabulary + a per-ontology breakdown
|
|
384
|
+
fed.nodes_by_kind("Person") # [Located(ontology, node), ...] — tagged by source
|
|
385
|
+
fed.node("person:ada") # every occurrence across worlds, identity-merged
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
```bash
|
|
389
|
+
kg fed schema # union vocabulary across ALL ontologies
|
|
390
|
+
kg fed node person:ada # find an id across the federation (identity-aware)
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
Federation never writes and never silently drops a member (a member that raises
|
|
394
|
+
propagates); `parallel=False` forces sequential for debugging.
|
|
395
|
+
|
|
396
|
+
### The backbone — cross-ontology links
|
|
397
|
+
|
|
398
|
+
A leaf edge can't cross ontologies: its foreign key lives in one file. The backbone
|
|
399
|
+
is where cross-ontology structure lives, and it needs **no new storage — it *is* the
|
|
400
|
+
index graph** growing new kinds. A link becomes a lightweight `Ref` proxy node per
|
|
401
|
+
endpoint (id `<ontology>::<node_id>`, FK-satisfied *inside* the index) joined by an
|
|
402
|
+
edge; the prefix registry becomes `Prefix` nodes. Both go through the **same gated +
|
|
403
|
+
logged `service` path**, so a cross-domain assertion is audited, reversible, and
|
|
404
|
+
replayable exactly like leaf data.
|
|
405
|
+
|
|
406
|
+
```bash
|
|
407
|
+
kg link add coffee drink:latte ENJOYED_BY people person:ada # a typed cross-ontology edge
|
|
408
|
+
kg link same-as people person:ada wiki person:ada-lovelace # "same real-world entity" (symmetric)
|
|
409
|
+
kg link cluster people person:ada # the transitive SAME_AS cluster
|
|
410
|
+
kg prefix add person https://kg.local/person/ # CURIE prefix -> IRI (identity backbone)
|
|
411
|
+
```
|
|
412
|
+
|
|
413
|
+
**Identity is opt-in per ontology.** By default identity is *local* — `person:ada`
|
|
414
|
+
in two ontologies are different nodes until you link them via the backbone. An
|
|
415
|
+
ontology created with `--shared-identity` opts into *global* identity, and federation
|
|
416
|
+
then treats same-CURIE nodes across such ontologies as the **same** entity and merges
|
|
417
|
+
them.
|
|
418
|
+
|
|
419
|
+
---
|
|
420
|
+
|
|
330
421
|
## Event sourcing: the graph is a projection
|
|
331
422
|
|
|
332
423
|
The graph you query is a cache. The **append-only event log is the source of
|
|
@@ -394,6 +485,55 @@ replay(graph, events, genesis=genesis, upto_ts=ts) # ...as of an instant
|
|
|
394
485
|
|
|
395
486
|
---
|
|
396
487
|
|
|
488
|
+
## Virtual edges: relationships you don't store
|
|
489
|
+
|
|
490
|
+
Event sourcing is the right model for *curated facts* — but the wrong one for a
|
|
491
|
+
high-cardinality, machine-generated relationship layer that already lives, fresh
|
|
492
|
+
and authoritative, in some operational store. Mirroring 100k correlation edges
|
|
493
|
+
into the graph (and the log) every night is wasteful and instantly stale.
|
|
494
|
+
|
|
495
|
+
A **virtual edge** inverts that. The ontology stores only a *binding* — an edge
|
|
496
|
+
TYPE plus the SQL that resolves its instances against an external source. At
|
|
497
|
+
traversal time the resolver runs that query, parameterized by the node you're
|
|
498
|
+
standing on, and synthesizes the edges live. Zero copy, always current, one
|
|
499
|
+
source of truth — Ontology-Based Data Access in the graph's own terms.
|
|
500
|
+
|
|
501
|
+
```python
|
|
502
|
+
# Bind CO_HELD_WITH to a query over the operational store (here, any DB-API source)
|
|
503
|
+
kg_virtual_edge_add(
|
|
504
|
+
edge_type="CO_HELD_WITH",
|
|
505
|
+
query="SELECT b AS to_id, shared FROM co_held WHERE a = ?", # '?' sqlite, '%s' postgres
|
|
506
|
+
dsn_env="OUROBOROS_DSN", # credentials by reference — never in the graph
|
|
507
|
+
source="id_slug", # company:NVDA -> bind "NVDA"
|
|
508
|
+
target_id_template="company:{value}",
|
|
509
|
+
prop_cols=["shared"], directions="both", ontology="market",
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
# now ordinary traversal unions stored + virtual edges; virtual ones carry _virtual:true
|
|
513
|
+
kg_edges("company:NVDA", direction="out", ontology="market")
|
|
514
|
+
# -> [{to: "company:AMD", type: "CO_HELD_WITH", properties: {shared: 12, _virtual: true}, …}]
|
|
515
|
+
```
|
|
516
|
+
|
|
517
|
+
Two properties keep it safe and simple:
|
|
518
|
+
|
|
519
|
+
- **Read-only.** Virtual edges are never written, so they sidestep the whole
|
|
520
|
+
gated-write / event-log / compensation machinery. A binding is config; the
|
|
521
|
+
edges are a view. Nothing to invalidate, replay, or undo.
|
|
522
|
+
- **Parameterized, never interpolated.** The SQL template is operator-authored;
|
|
523
|
+
the per-node value is always *bound* through the driver, never formatted into
|
|
524
|
+
the string. Credentials ride `dsn_env` (an env-var name), so secrets stay out
|
|
525
|
+
of the graph and out of version control.
|
|
526
|
+
|
|
527
|
+
Bindings live as reserved-kind (`_VirtualEdge`) nodes *in the ontology*, so they
|
|
528
|
+
travel with it and version alongside the schema. This is the seam for a
|
|
529
|
+
schema-graph / data-graph split: keep the curated **schema** (types, contracts,
|
|
530
|
+
doctrine) in a portable SQLite ontology, and **virtualize the populated extension**
|
|
531
|
+
straight out of your system-of-record — no ETL, no drift.
|
|
532
|
+
|
|
533
|
+
Tools: `kg_virtual_edge_add`, `kg_virtual_edges_list`, `kg_virtual_edge_remove`.
|
|
534
|
+
|
|
535
|
+
---
|
|
536
|
+
|
|
397
537
|
## The safety gate: invariants vs. policy
|
|
398
538
|
|
|
399
539
|
When you expose the graph for live mutation — especially to an AI agent over
|
|
@@ -541,18 +681,20 @@ Or hand-edit a client config (e.g. Claude Desktop):
|
|
|
541
681
|
{ "mcpServers": { "kgrdbms": { "command": "kgrdbms-mcp" } } }
|
|
542
682
|
```
|
|
543
683
|
|
|
544
|
-
It exposes `kg_`-prefixed tools
|
|
545
|
-
be called first
|
|
546
|
-
`kg_neighborhood`, `kg_shortest_path`, `kg_descendants
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
(`
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
684
|
+
It exposes `kg_`-prefixed tools over one engine. Reads — `kg_schema` (the
|
|
685
|
+
vocabulary, meant to be called first), `kg_node_get`, `kg_find` (by kind and/or
|
|
686
|
+
label), `kg_edges`, `kg_neighborhood`, `kg_shortest_path`, `kg_descendants` — each
|
|
687
|
+
take an optional `ontologies=[...]` to fan out across many ontologies in one
|
|
688
|
+
call. Then gated writes (`kg_node_upsert`, `kg_edge_add`, `kg_edge_remove`,
|
|
689
|
+
`kg_node_delete`), bulk composition (`kg_import` — a whole `{nodes, edges}` batch in
|
|
690
|
+
one call, so an agent populates an ontology in a single tool call instead of dozens),
|
|
691
|
+
the cross-ontology backbone (`kg_link`, `kg_links_of`, `kg_identity`, `kg_prefix_add`,
|
|
692
|
+
`kg_prefix_resolve`), RDF interop (`kg_rdf_export`, `kg_rdf_import` — see below), and
|
|
693
|
+
the event log (`kg_events_tail`, `kg_event_revert`, `kg_replay`). Every write passes
|
|
694
|
+
the invariants + policy gate and is recorded — same engine, same file as the CLI.
|
|
695
|
+
Every tool takes an optional `ontology` name, and `kg_ontologies_list` /
|
|
696
|
+
`kg_ontology_create` / `kg_ontology_delete` manage the registry — so an agent can
|
|
697
|
+
discover, create, route between, and delete ontologies entirely over MCP.
|
|
556
698
|
|
|
557
699
|
---
|
|
558
700
|
|
|
@@ -573,7 +715,7 @@ that data by `bench/charts.py`. Run both on your own machine in one command.
|
|
|
573
715
|
|
|
574
716
|
### Writes — the batching lever
|
|
575
717
|
|
|
576
|
-

|
|
577
719
|
|
|
578
720
|
Each single write commits on its own for durability. Wrapping a bulk load in
|
|
579
721
|
`batch()` / `add_nodes` / `add_edges` collapses those per-call commits into one
|
|
@@ -584,7 +726,7 @@ thousands per second.
|
|
|
584
726
|
|
|
585
727
|
### Reads — fast, with an honest tail
|
|
586
728
|
|
|
587
|
-

|
|
588
730
|
|
|
589
731
|
Point lookups land in single-digit microseconds, and multi-node reads hydrate
|
|
590
732
|
the whole result set in a constant number of queries (no N+1 fan-out). The chart
|
|
@@ -598,7 +740,7 @@ SQLite engine runs under CPython, Node, and Bun, so the gap between them is pure
|
|
|
598
740
|
binding overhead — under 2×, and it doesn't even favor one runtime across
|
|
599
741
|
operations.
|
|
600
742
|
|
|
601
|
-

|
|
602
744
|
|
|
603
745
|
The lever that actually moved the needle was transaction batching (~10×, above),
|
|
604
746
|
not the language. Reproduce it with `python bench/runtimes/compare.py`.
|
|
@@ -608,7 +750,7 @@ not the language. Reproduce it with `python bench/runtimes/compare.py`.
|
|
|
608
750
|
We measured it against Neo4j — same graph, same queries, identical methodology
|
|
609
751
|
(full harness and reproduction in [`bench/neo4j/`](bench/neo4j/README.md)):
|
|
610
752
|
|
|
611
|
-

|
|
612
754
|
|
|
613
755
|
Queries compile to SQL over B-tree indexes, so each traversal hop is an index
|
|
614
756
|
lookup — wonderfully cheap for point reads and shallow traversals. An in-process
|
|
@@ -737,7 +879,18 @@ replayable.
|
|
|
737
879
|
| `kg rdf export [--format F]` | serialize to Turtle/N-Triples (RDF-star) |
|
|
738
880
|
| `kg rdf import FILE` | load RDF back in (gated + logged) |
|
|
739
881
|
| `kg ontology list` | list registered ontologies (the registry) |
|
|
740
|
-
| `kg ontology create NAME …` | register an ontology (`--backend`, `--stance`) |
|
|
882
|
+
| `kg ontology create NAME …` | register an ontology (`--backend`, `--stance`, `--shared-identity`) |
|
|
883
|
+
| `kg ontology delete NAME [--purge]` | deregister an ontology (`--purge` also deletes its data) |
|
|
884
|
+
| `kg fed schema [--samples]` | union vocabulary across ALL ontologies (multithreaded fan-out) |
|
|
885
|
+
| `kg fed stats` | node/edge totals across the federation |
|
|
886
|
+
| `kg fed nodes-by-kind KIND` / `nodes-by-label LABEL` | nodes across ontologies, tagged by source |
|
|
887
|
+
| `kg fed node ID` | find an id across the federation (identity-aware) |
|
|
888
|
+
| `kg link add FROM_ONT FROM TYPE TO_ONT TO` | cross-ontology edge (the backbone) |
|
|
889
|
+
| `kg link same-as A FROM B TO` | assert two nodes are the same entity (symmetric) |
|
|
890
|
+
| `kg link of ONT ID` | cross-ontology links touching a node |
|
|
891
|
+
| `kg link cluster ONT ID` | the transitive SAME_AS identity cluster |
|
|
892
|
+
| `kg prefix add P IRI_BASE` | bind a CURIE prefix to an IRI base |
|
|
893
|
+
| `kg prefix expand CURIE` / `contract IRI` | CURIE ↔ IRI via the registry |
|
|
741
894
|
| `kg serve [--transport T]` | run the MCP server |
|
|
742
895
|
|
|
743
896
|
Add `--json` to any command for machine-readable output. Target a graph with
|
|
@@ -756,6 +909,8 @@ kgrdbms/
|
|
|
756
909
|
├── invariants.py # compiled-in invariants, checked before policy (no-op default)
|
|
757
910
|
├── service.py # the shared gated + logged write path
|
|
758
911
|
├── resolver.py # control plane: ontology name → (backend, events, entry) + the index
|
|
912
|
+
├── federation.py # cross-ontology reads: multithreaded fan-out, identity-aware merge
|
|
913
|
+
├── backbone.py # cross-ontology links + prefix/IRI registry (lives in the index graph)
|
|
759
914
|
├── backends/ # pluggable engine registry
|
|
760
915
|
│ ├── base.py # GraphBackend protocol + raising stub skeleton
|
|
761
916
|
│ ├── sqlite.py # live engine (adapter over Graph)
|
|
@@ -777,7 +932,7 @@ own. Everything else layers on top; `service.py` depends only on the
|
|
|
777
932
|
```bash
|
|
778
933
|
git clone <repo> && cd knowledge-graph-rdbms
|
|
779
934
|
uv venv && uv pip install -e ".[dev]"
|
|
780
|
-
pytest #
|
|
935
|
+
pytest # 107 tests
|
|
781
936
|
python bench/benchmark.py # benchmark with p50–p99 (see bench/README.md)
|
|
782
937
|
```
|
|
783
938
|
|