knowledge-graph-rdbms 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/.github/workflows/ci.yml +3 -3
  2. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/CLAUDE.md +17 -1
  3. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/PKG-INFO +176 -21
  4. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/README.md +175 -20
  5. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/__init__.py +6 -1
  6. knowledge_graph_rdbms-0.1.6/kgrdbms/backbone.py +265 -0
  7. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/cli.py +255 -3
  8. knowledge_graph_rdbms-0.1.6/kgrdbms/federation.py +215 -0
  9. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/mcp_server.py +258 -85
  10. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/resolver.py +48 -0
  11. knowledge_graph_rdbms-0.1.6/kgrdbms/virtual.py +294 -0
  12. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/pyproject.toml +1 -1
  13. knowledge_graph_rdbms-0.1.6/tests/test_backbone.py +69 -0
  14. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_cli.py +51 -0
  15. knowledge_graph_rdbms-0.1.6/tests/test_federation.py +84 -0
  16. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_mcp_server.py +52 -12
  17. knowledge_graph_rdbms-0.1.6/tests/test_virtual.py +124 -0
  18. knowledge_graph_rdbms-0.1.6/uv.lock +1984 -0
  19. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/.claude/skills/kg-compose/SKILL.md +0 -0
  20. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/.github/workflows/publish.yml +0 -0
  21. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/.gitignore +0 -0
  22. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/CODE_OF_CONDUCT.md +0 -0
  23. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/CONTRIBUTING.md +0 -0
  24. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/LICENSE +0 -0
  25. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/SECURITY.md +0 -0
  26. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/assets/crossover.png +0 -0
  27. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/assets/read_latency.png +0 -0
  28. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/assets/runtimes.png +0 -0
  29. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/assets/write_throughput.png +0 -0
  30. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/README.md +0 -0
  31. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/benchmark.py +0 -0
  32. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/charts.py +0 -0
  33. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/neo4j/README.md +0 -0
  34. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/neo4j/headtohead.py +0 -0
  35. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/postgres/README.md +0 -0
  36. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/postgres/benchmark.py +0 -0
  37. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/postgres/charts.py +0 -0
  38. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/runtimes/compare.py +0 -0
  39. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/runtimes/run_bun.js +0 -0
  40. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/runtimes/run_node.mjs +0 -0
  41. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/bench/runtimes/run_python.py +0 -0
  42. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/backends/__init__.py +0 -0
  43. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/backends/base.py +0 -0
  44. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/backends/neo4j.py +0 -0
  45. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/backends/postgres.py +0 -0
  46. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/backends/sqlite.py +0 -0
  47. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/events.py +0 -0
  48. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/graph.py +0 -0
  49. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/invariants.py +0 -0
  50. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/policy.py +0 -0
  51. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/rdf.py +0 -0
  52. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/kgrdbms/service.py +0 -0
  53. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_bulk.py +0 -0
  54. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_events.py +0 -0
  55. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_graph.py +0 -0
  56. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_policy.py +0 -0
  57. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_postgres.py +0 -0
  58. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_rdf.py +0 -0
  59. {knowledge_graph_rdbms-0.1.4 → knowledge_graph_rdbms-0.1.6}/tests/test_schema.py +0 -0
@@ -1,14 +1,14 @@
1
1
  name: CI
2
2
 
3
- # Runs the test suite on every push to master and every pull request.
3
+ # Runs the test suite on every push to main and every pull request.
4
4
  # Make the `ci` job below a required status check in branch protection to
5
5
  # gate merges on a green suite.
6
6
 
7
7
  on:
8
8
  push:
9
- branches: [master]
9
+ branches: [main]
10
10
  pull_request:
11
- branches: [master]
11
+ branches: [main]
12
12
 
13
13
  concurrency:
14
14
  group: ci-${{ github.ref }}
@@ -26,6 +26,11 @@ kg schema --samples # + example ids and enum-like property
26
26
  kg ontology list # the registry (the "db of dbs")
27
27
  kg ontology create coffee --stance inferential # register a named ontology
28
28
  kg --ontology coffee node add drink:latte --kind Drink # route to it (resolver)
29
+ kg fed schema # union vocabulary across ALL ontologies (multithreaded fan-out)
30
+ kg fed node person:ada # find an id across the federation (identity-aware)
31
+ kg link add coffee drink:latte ENJOYED_BY people person:ada # cross-ontology edge (backbone)
32
+ kg link same-as people person:ada wiki person:ada-lovelace # assert same real-world entity
33
+ kg prefix add person https://kg.local/person/ # CURIE prefix -> IRI (identity backbone)
29
34
  kg --db /tmp/x.db node add a:1 --kind T # raw escape hatch: exact file, no registry
30
35
  kg serve # run the MCP server (needs [mcp] extra)
31
36
  ```
@@ -68,6 +73,15 @@ The engine above (`graph.py` + `service.py` + the log + the gate) operates on **
68
73
  - **The event log is decoupled from the backend.** `EventLog(store, projection=None)`: *store* is the SQLite that holds the log rows; *projection* is the `GraphBackend` that `compensate()`/replay apply to. They coincide for sqlite (`EventLog(graph)` — projection defaults to store, unchanged). For postgres the store is `resolver._ControlPlaneLogStore` (a `<root>/ontologies/<slug>/events.db` sidecar) and the projection is the `PostgresGraph` — so audit/replay/undo keep working with graph data in Postgres and history in SQLite. `apply_event` only calls `GraphBackend` methods, so it drives any backend. This store↔projection split is the seam to respect for *any* non-sqlite engine (neo4j next).
69
74
  - **New failure class:** routing to a stub engine raises `NotImplementedError`. Every front door's error handling must account for it (the CLI's `main()` already maps it to `unavailable: …` / exit 1).
70
75
 
76
+ ## Cross-ontology: federation (reads) + the backbone (links)
77
+
78
+ The control plane routes to **one** ontology per call. Two modules sit on top to work across **many** at once — and the split is deliberate: reads federate, writes go through a backbone.
79
+
80
+ - **`federation.py` — cross-ontology READS, multithreaded by default.** A federated read is a *fan-out*: open each member ontology in its own thread (own connection — SQLite/psycopg release the GIL during a query, so N ontologies read concurrently) and merge the results tagged by source. `Federation(names)` or `Federation.all()`; methods mirror the single-graph reads (`schema`, `stats`, `nodes_by_kind`, `nodes_by_label`, `node`) plus `identity()`. Each worker calls `resolver.resolve()` in its *own* thread, so no connection is ever shared across threads. `parallel=False` forces sequential for debugging. **Federation never writes** and never silently drops a member (a member that raises propagates).
81
+ - **`backbone.py` — cross-ontology LINKS + the prefix registry.** A leaf edge can't cross ontologies (its FK lives in one file). The backbone is where cross-ontology structure lives, and **it needs no new storage: it IS the index graph** (`<root>/index.db`) growing new kinds. A link becomes a `Ref` proxy node per endpoint (id = `<ontology>::<node_id>`, FK-satisfied inside the index) joined by an edge; the prefix registry becomes `Prefix` nodes. **Both go through the gated + logged `service` path against the index's own event log** — so cross-domain assertions (`link`, `same_as`) are audited, reversible, and replayable exactly like leaf data. `SAME_AS` is symmetric and traversed transitively by `identity_cluster()`.
82
+ - **Identity is opt-in per ontology.** `OntologyEntry.shared_identity` (default `False` = local): when `True`, federation treats same-CURIE nodes across such ontologies as the *same* entity and merges them in `Federation.node()`. Everyone else stays local — link explicitly via the backbone. This is the lightweight LPG answer to RDF's global-IRI identity: stay local by default, adopt shared identity surgically.
83
+ - **The qualified-id delimiter is `::`** (`coffee::drink:latte`), distinct from the single-colon CURIE so a node's own colons survive `unqualify()`. Don't reuse `:`.
84
+
71
85
  ## Layout
72
86
 
73
87
  ```
@@ -78,6 +92,8 @@ kgrdbms/
78
92
  ├── invariants.py # compiled-in invariants, run before policy (no-op default)
79
93
  ├── service.py # the shared gated + logged write path (all front doors use this)
80
94
  ├── resolver.py # control plane: name → (backend, events, entry); the ontology index
95
+ ├── federation.py # cross-ontology READS: multithreaded fan-out, identity-aware merge
96
+ ├── backbone.py # cross-ontology LINKS + prefix/IRI registry (lives in the index graph)
81
97
  ├── backends/ # pluggable data plane (engine registry)
82
98
  │ ├── base.py # GraphBackend Protocol + _StubBackend
83
99
  │ ├── __init__.py # registry: @backend(name), get_backend, available_backends
@@ -88,7 +104,7 @@ kgrdbms/
88
104
  └── mcp_server.py # MCP server, kg_-prefixed tools, each with optional `ontology=` (optional [mcp] extra)
89
105
  ```
90
106
 
91
- `graph.py` has no internal dependencies — everything else layers on top of it. Dependency direction: `graph` ← `events`/`backends` ← `resolver` ← `service`-callers (`cli`, `mcp_server`). `service.py` depends only on the `GraphBackend` surface, never a concrete engine. Public API is re-exported from `__init__.py`.
107
+ `graph.py` has no internal dependencies — everything else layers on top of it. Dependency direction: `graph` ← `events`/`backends` ← `resolver` ← `backbone`/`federation` ← `service`-callers (`cli`, `mcp_server`). `backbone` builds on `resolver` + `service` (the index is its store); `federation` builds on `resolver` (+ `backbone` for identity clusters). `service.py` depends only on the `GraphBackend` surface, never a concrete engine. Public API is re-exported from `__init__.py`.
92
108
 
93
109
  ## Node id convention (CURIEs)
94
110
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: knowledge-graph-rdbms
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: A label property graph on an RDBMS (SQLite): nodes, typed edges, an append-only event log, and an optional MCP server.
5
5
  Project-URL: Homepage, https://github.com/cunicopia-dev/knowledge-graph-rdbms
6
6
  Project-URL: Repository, https://github.com/cunicopia-dev/knowledge-graph-rdbms
@@ -31,11 +31,12 @@ Description-Content-Type: text/markdown
31
31
 
32
32
  # knowledge-graph-rdbms
33
33
 
34
+ ![PyPI](https://img.shields.io/pypi/v/knowledge-graph-rdbms?logo=pypi&logoColor=white&color=3775A9)
34
35
  ![Python](https://img.shields.io/badge/python-3.10%2B-3776AB?logo=python&logoColor=white)
35
36
  ![License: MIT](https://img.shields.io/badge/license-MIT-green)
36
37
  ![core dependencies: 0](https://img.shields.io/badge/core_dependencies-0-success)
37
- ![tests: 87 passing](https://img.shields.io/badge/tests-87_passing-brightgreen)
38
- ![storage: SQLite](https://img.shields.io/badge/storage-SQLite-003B57?logo=sqlite&logoColor=white)
38
+ ![tests: 107 passing](https://img.shields.io/badge/tests-107_passing-brightgreen)
39
+ ![storage: SQLite + Postgres](https://img.shields.io/badge/storage-SQLite_%2B_Postgres-003B57?logo=sqlite&logoColor=white)
39
40
  ![MCP](https://img.shields.io/badge/MCP-ready-FF6F00)
40
41
 
41
42
  **A knowledge graph for modeling _meaning_ — entities, the kinds of things they
@@ -77,6 +78,8 @@ Small enough to hold in your head. Flexible enough to model anything.
77
78
  - [The data model](#the-data-model)
78
79
  - [Architecture: three front doors, one engine](#architecture-three-front-doors-one-engine)
79
80
  - [Many ontologies: one control plane](#many-ontologies-one-control-plane)
81
+ - [Discovery: read the schema before you query](#discovery-read-the-schema-before-you-query)
82
+ - [Cross-ontology: federation and the backbone](#cross-ontology-federation-and-the-backbone)
80
83
  - [Event sourcing: the graph is a projection](#event-sourcing-the-graph-is-a-projection)
81
84
  - [The safety gate: invariants vs. policy](#the-safety-gate-invariants-vs-policy)
82
85
  - [Install](#install)
@@ -358,6 +361,94 @@ NAME` routes through the resolver (named, registered, multi-engine), while
358
361
 
359
362
  ---
360
363
 
364
+ ## Discovery: read the schema before you query
365
+
366
+ A graph you didn't build is opaque: which `kind`s exist? which edge types? what
367
+ property keys live on a `Person`? `schema()` answers all of it in **one read** —
368
+ the map to read before querying, rather than guessing with trial calls.
369
+
370
+ ```bash
371
+ kg schema # kinds, edge types, labels, and property keys per kind — with counts
372
+ kg schema --samples # + a few example ids per kind and the enum-like values a key takes
373
+ ```
374
+
375
+ What comes back is the *observed* vocabulary — a profile of what's actually in
376
+ the graph, not an enforced schema (the graph stays schemaless). The MCP tool
377
+ `kg_schema` carries an instruction to call it **first**, so an agent reads the map
378
+ before it moves. It's a plain read — pure `GROUP BY` aggregates, no gate — and
379
+ like every read it has a federated form (next) that unions the vocabulary across
380
+ many ontologies at once.
381
+
382
+ ---
383
+
384
+ ## Cross-ontology: federation and the backbone
385
+
386
+ The control plane routes to *one* ontology per call. Two layers sit on top to work
387
+ across *many* at once: **reads federate, writes go through a backbone.**
388
+
389
+ ```mermaid
390
+ flowchart TD
391
+ Q["kg fed node person:ada"] --> FED["federation.py<br/>multithreaded fan-out"]
392
+ FED -.->|own thread + connection| O1[("people")]
393
+ FED -.->|own thread + connection| O2[("papers")]
394
+ FED -.->|own thread + connection| O3[("coffee")]
395
+ O1 --> M["merge, tagged by source<br/>(identity-aware)"]
396
+ O2 --> M
397
+ O3 --> M
398
+
399
+ L["kg link same-as …"] --> BB["backbone.py"]
400
+ BB --> IDX[["index.db — the backbone<br/>Ref + Prefix nodes, SAME_AS edges<br/>gated + logged"]]
401
+ ```
402
+
403
+ ### Federation — cross-ontology reads, multithreaded
404
+
405
+ A federated read is a *fan-out*: open each member ontology in its own thread (its
406
+ own connection — SQLite releases the GIL during a query, so N ontologies read
407
+ **concurrently**) and merge the results tagged by source. There's no separate
408
+ "federated" API: every base read takes an optional `ontologies=[...]` scope.
409
+
410
+ ```python
411
+ from kgrdbms import Federation
412
+
413
+ fed = Federation(["people", "papers", "coffee"]) # or Federation.all()
414
+ fed.schema() # unioned vocabulary + a per-ontology breakdown
415
+ fed.nodes_by_kind("Person") # [Located(ontology, node), ...] — tagged by source
416
+ fed.node("person:ada") # every occurrence across worlds, identity-merged
417
+ ```
418
+
419
+ ```bash
420
+ kg fed schema # union vocabulary across ALL ontologies
421
+ kg fed node person:ada # find an id across the federation (identity-aware)
422
+ ```
423
+
424
+ Federation never writes and never silently drops a member (a member that raises
425
+ propagates); `parallel=False` forces sequential for debugging.
426
+
427
+ ### The backbone — cross-ontology links
428
+
429
+ A leaf edge can't cross ontologies: its foreign key lives in one file. The backbone
430
+ is where cross-ontology structure lives, and it needs **no new storage — it *is* the
431
+ index graph** growing new kinds. A link becomes a lightweight `Ref` proxy node per
432
+ endpoint (id `<ontology>::<node_id>`, FK-satisfied *inside* the index) joined by an
433
+ edge; the prefix registry becomes `Prefix` nodes. Both go through the **same gated +
434
+ logged `service` path**, so a cross-domain assertion is audited, reversible, and
435
+ replayable exactly like leaf data.
436
+
437
+ ```bash
438
+ kg link add coffee drink:latte ENJOYED_BY people person:ada # a typed cross-ontology edge
439
+ kg link same-as people person:ada wiki person:ada-lovelace # "same real-world entity" (symmetric)
440
+ kg link cluster people person:ada # the transitive SAME_AS cluster
441
+ kg prefix add person https://kg.local/person/ # CURIE prefix -> IRI (identity backbone)
442
+ ```
443
+
444
+ **Identity is opt-in per ontology.** By default identity is *local* — `person:ada`
445
+ in two ontologies are different nodes until you link them via the backbone. An
446
+ ontology created with `--shared-identity` opts into *global* identity, and federation
447
+ then treats same-CURIE nodes across such ontologies as the **same** entity and merges
448
+ them.
449
+
450
+ ---
451
+
361
452
  ## Event sourcing: the graph is a projection
362
453
 
363
454
  The graph you query is a cache. The **append-only event log is the source of
@@ -425,6 +516,55 @@ replay(graph, events, genesis=genesis, upto_ts=ts) # ...as of an instant
425
516
 
426
517
  ---
427
518
 
519
+ ## Virtual edges: relationships you don't store
520
+
521
+ Event sourcing is the right model for *curated facts* — but the wrong one for a
522
+ high-cardinality, machine-generated relationship layer that already lives, fresh
523
+ and authoritative, in some operational store. Mirroring 100k correlation edges
524
+ into the graph (and the log) every night is wasteful and instantly stale.
525
+
526
+ A **virtual edge** inverts that. The ontology stores only a *binding* — an edge
527
+ TYPE plus the SQL that resolves its instances against an external source. At
528
+ traversal time the resolver runs that query, parameterized by the node you're
529
+ standing on, and synthesizes the edges live. Zero copy, always current, one
530
+ source of truth — Ontology-Based Data Access in the graph's own terms.
531
+
532
+ ```python
533
+ # Bind CO_HELD_WITH to a query over the operational store (here, any DB-API source)
534
+ kg_virtual_edge_add(
535
+ edge_type="CO_HELD_WITH",
536
+ query="SELECT b AS to_id, shared FROM co_held WHERE a = ?", # '?' sqlite, '%s' postgres
537
+ dsn_env="OUROBOROS_DSN", # credentials by reference — never in the graph
538
+ source="id_slug", # company:NVDA -> bind "NVDA"
539
+ target_id_template="company:{value}",
540
+ prop_cols=["shared"], directions="both", ontology="market",
541
+ )
542
+
543
+ # now ordinary traversal unions stored + virtual edges; virtual ones carry _virtual:true
544
+ kg_edges("company:NVDA", direction="out", ontology="market")
545
+ # -> [{to: "company:AMD", type: "CO_HELD_WITH", properties: {shared: 12, _virtual: true}, …}]
546
+ ```
547
+
548
+ Two properties keep it safe and simple:
549
+
550
+ - **Read-only.** Virtual edges are never written, so they sidestep the whole
551
+ gated-write / event-log / compensation machinery. A binding is config; the
552
+ edges are a view. Nothing to invalidate, replay, or undo.
553
+ - **Parameterized, never interpolated.** The SQL template is operator-authored;
554
+ the per-node value is always *bound* through the driver, never formatted into
555
+ the string. Credentials ride `dsn_env` (an env-var name), so secrets stay out
556
+ of the graph and out of version control.
557
+
558
+ Bindings live as reserved-kind (`_VirtualEdge`) nodes *in the ontology*, so they
559
+ travel with it and version alongside the schema. This is the seam for a
560
+ schema-graph / data-graph split: keep the curated **schema** (types, contracts,
561
+ doctrine) in a portable SQLite ontology, and **virtualize the populated extension**
562
+ straight out of your system-of-record — no ETL, no drift.
563
+
564
+ Tools: `kg_virtual_edge_add`, `kg_virtual_edges_list`, `kg_virtual_edge_remove`.
565
+
566
+ ---
567
+
428
568
  ## The safety gate: invariants vs. policy
429
569
 
430
570
  When you expose the graph for live mutation — especially to an AI agent over
@@ -572,18 +712,20 @@ Or hand-edit a client config (e.g. Claude Desktop):
572
712
  { "mcpServers": { "kgrdbms": { "command": "kgrdbms-mcp" } } }
573
713
  ```
574
714
 
575
- It exposes `kg_`-prefixed tools for reads (`kg_schema` the vocabulary, meant to
576
- be called first; `kg_node_get`, `kg_nodes_by_kind`,
577
- `kg_neighborhood`, `kg_shortest_path`, `kg_descendants`, …), gated writes
578
- (`kg_node_upsert`, `kg_edge_add`, `kg_node_delete`, …), bulk composition
579
- (`kg_import` a whole `{nodes, edges}` batch in one call, so an agent populates
580
- an ontology in a single tool call instead of dozens), RDF interop
581
- (`kg_rdf_export`, `kg_rdf_import` see below), and the event log
582
- (`kg_events_tail`, `kg_event_revert`, `kg_replay`). Every write passes through
583
- the invariants + policy gate and is recorded same engine, same file as the
584
- CLI. Every tool also takes an optional `ontology` name (omit for the default),
585
- and `kg_ontologies_list` / `kg_ontology_create` manage the registryso an
586
- agent can discover, create, and route between ontologies entirely over MCP.
715
+ It exposes `kg_`-prefixed tools over one engine. Reads — `kg_schema` (the
716
+ vocabulary, meant to be called first), `kg_node_get`, `kg_find` (by kind and/or
717
+ label), `kg_edges`, `kg_neighborhood`, `kg_shortest_path`, `kg_descendants` each
718
+ take an optional `ontologies=[...]` to fan out across many ontologies in one
719
+ call. Then gated writes (`kg_node_upsert`, `kg_edge_add`, `kg_edge_remove`,
720
+ `kg_node_delete`), bulk composition (`kg_import` a whole `{nodes, edges}` batch in
721
+ one call, so an agent populates an ontology in a single tool call instead of dozens),
722
+ the cross-ontology backbone (`kg_link`, `kg_links_of`, `kg_identity`, `kg_prefix_add`,
723
+ `kg_prefix_resolve`), RDF interop (`kg_rdf_export`, `kg_rdf_import`see below), and
724
+ the event log (`kg_events_tail`, `kg_event_revert`, `kg_replay`). Every write passes
725
+ the invariants + policy gate and is recorded same engine, same file as the CLI.
726
+ Every tool takes an optional `ontology` name, and `kg_ontologies_list` /
727
+ `kg_ontology_create` / `kg_ontology_delete` manage the registry — so an agent can
728
+ discover, create, route between, and delete ontologies entirely over MCP.
587
729
 
588
730
  ---
589
731
 
@@ -604,7 +746,7 @@ that data by `bench/charts.py`. Run both on your own machine in one command.
604
746
 
605
747
  ### Writes — the batching lever
606
748
 
607
- ![Write throughput — batch the commit, ~10× faster](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/master/assets/write_throughput.png)
749
+ ![Write throughput — batch the commit, ~10× faster](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/main/assets/write_throughput.png)
608
750
 
609
751
  Each single write commits on its own for durability. Wrapping a bulk load in
610
752
  `batch()` / `add_nodes` / `add_edges` collapses those per-call commits into one
@@ -615,7 +757,7 @@ thousands per second.
615
757
 
616
758
  ### Reads — fast, with an honest tail
617
759
 
618
- ![Read latency — p50 marker, whisker to p99, log scale](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/master/assets/read_latency.png)
760
+ ![Read latency — p50 marker, whisker to p99, log scale](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/main/assets/read_latency.png)
619
761
 
620
762
  Point lookups land in single-digit microseconds, and multi-node reads hydrate
621
763
  the whole result set in a constant number of queries (no N+1 fan-out). The chart
@@ -629,7 +771,7 @@ SQLite engine runs under CPython, Node, and Bun, so the gap between them is pure
629
771
  binding overhead — under 2×, and it doesn't even favor one runtime across
630
772
  operations.
631
773
 
632
- ![Same SQLite across CPython, Node, and Bun](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/master/assets/runtimes.png)
774
+ ![Same SQLite across CPython, Node, and Bun](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/main/assets/runtimes.png)
633
775
 
634
776
  The lever that actually moved the needle was transaction batching (~10×, above),
635
777
  not the language. Reproduce it with `python bench/runtimes/compare.py`.
@@ -639,7 +781,7 @@ not the language. Reproduce it with `python bench/runtimes/compare.py`.
639
781
  We measured it against Neo4j — same graph, same queries, identical methodology
640
782
  (full harness and reproduction in [`bench/neo4j/`](bench/neo4j/README.md)):
641
783
 
642
- ![Where the crossover is — kgrdbms vs Neo4j](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/master/assets/crossover.png)
784
+ ![Where the crossover is — kgrdbms vs Neo4j](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/main/assets/crossover.png)
643
785
 
644
786
  Queries compile to SQL over B-tree indexes, so each traversal hop is an index
645
787
  lookup — wonderfully cheap for point reads and shallow traversals. An in-process
@@ -768,7 +910,18 @@ replayable.
768
910
  | `kg rdf export [--format F]` | serialize to Turtle/N-Triples (RDF-star) |
769
911
  | `kg rdf import FILE` | load RDF back in (gated + logged) |
770
912
  | `kg ontology list` | list registered ontologies (the registry) |
771
- | `kg ontology create NAME …` | register an ontology (`--backend`, `--stance`) |
913
+ | `kg ontology create NAME …` | register an ontology (`--backend`, `--stance`, `--shared-identity`) |
914
+ | `kg ontology delete NAME [--purge]` | deregister an ontology (`--purge` also deletes its data) |
915
+ | `kg fed schema [--samples]` | union vocabulary across ALL ontologies (multithreaded fan-out) |
916
+ | `kg fed stats` | node/edge totals across the federation |
917
+ | `kg fed nodes-by-kind KIND` / `nodes-by-label LABEL` | nodes across ontologies, tagged by source |
918
+ | `kg fed node ID` | find an id across the federation (identity-aware) |
919
+ | `kg link add FROM_ONT FROM TYPE TO_ONT TO` | cross-ontology edge (the backbone) |
920
+ | `kg link same-as A FROM B TO` | assert two nodes are the same entity (symmetric) |
921
+ | `kg link of ONT ID` | cross-ontology links touching a node |
922
+ | `kg link cluster ONT ID` | the transitive SAME_AS identity cluster |
923
+ | `kg prefix add P IRI_BASE` | bind a CURIE prefix to an IRI base |
924
+ | `kg prefix expand CURIE` / `contract IRI` | CURIE ↔ IRI via the registry |
772
925
  | `kg serve [--transport T]` | run the MCP server |
773
926
 
774
927
  Add `--json` to any command for machine-readable output. Target a graph with
@@ -787,6 +940,8 @@ kgrdbms/
787
940
  ├── invariants.py # compiled-in invariants, checked before policy (no-op default)
788
941
  ├── service.py # the shared gated + logged write path
789
942
  ├── resolver.py # control plane: ontology name → (backend, events, entry) + the index
943
+ ├── federation.py # cross-ontology reads: multithreaded fan-out, identity-aware merge
944
+ ├── backbone.py # cross-ontology links + prefix/IRI registry (lives in the index graph)
790
945
  ├── backends/ # pluggable engine registry
791
946
  │ ├── base.py # GraphBackend protocol + raising stub skeleton
792
947
  │ ├── sqlite.py # live engine (adapter over Graph)
@@ -808,7 +963,7 @@ own. Everything else layers on top; `service.py` depends only on the
808
963
  ```bash
809
964
  git clone <repo> && cd knowledge-graph-rdbms
810
965
  uv venv && uv pip install -e ".[dev]"
811
- pytest # 62 tests
966
+ pytest # 107 tests
812
967
  python bench/benchmark.py # benchmark with p50–p99 (see bench/README.md)
813
968
  ```
814
969
 
@@ -1,10 +1,11 @@
1
1
  # knowledge-graph-rdbms
2
2
 
3
+ ![PyPI](https://img.shields.io/pypi/v/knowledge-graph-rdbms?logo=pypi&logoColor=white&color=3775A9)
3
4
  ![Python](https://img.shields.io/badge/python-3.10%2B-3776AB?logo=python&logoColor=white)
4
5
  ![License: MIT](https://img.shields.io/badge/license-MIT-green)
5
6
  ![core dependencies: 0](https://img.shields.io/badge/core_dependencies-0-success)
6
- ![tests: 87 passing](https://img.shields.io/badge/tests-87_passing-brightgreen)
7
- ![storage: SQLite](https://img.shields.io/badge/storage-SQLite-003B57?logo=sqlite&logoColor=white)
7
+ ![tests: 107 passing](https://img.shields.io/badge/tests-107_passing-brightgreen)
8
+ ![storage: SQLite + Postgres](https://img.shields.io/badge/storage-SQLite_%2B_Postgres-003B57?logo=sqlite&logoColor=white)
8
9
  ![MCP](https://img.shields.io/badge/MCP-ready-FF6F00)
9
10
 
10
11
  **A knowledge graph for modeling _meaning_ — entities, the kinds of things they
@@ -46,6 +47,8 @@ Small enough to hold in your head. Flexible enough to model anything.
46
47
  - [The data model](#the-data-model)
47
48
  - [Architecture: three front doors, one engine](#architecture-three-front-doors-one-engine)
48
49
  - [Many ontologies: one control plane](#many-ontologies-one-control-plane)
50
+ - [Discovery: read the schema before you query](#discovery-read-the-schema-before-you-query)
51
+ - [Cross-ontology: federation and the backbone](#cross-ontology-federation-and-the-backbone)
49
52
  - [Event sourcing: the graph is a projection](#event-sourcing-the-graph-is-a-projection)
50
53
  - [The safety gate: invariants vs. policy](#the-safety-gate-invariants-vs-policy)
51
54
  - [Install](#install)
@@ -327,6 +330,94 @@ NAME` routes through the resolver (named, registered, multi-engine), while
327
330
 
328
331
  ---
329
332
 
333
+ ## Discovery: read the schema before you query
334
+
335
+ A graph you didn't build is opaque: which `kind`s exist? which edge types? what
336
+ property keys live on a `Person`? `schema()` answers all of it in **one read** —
337
+ the map to read before querying, rather than guessing with trial calls.
338
+
339
+ ```bash
340
+ kg schema # kinds, edge types, labels, and property keys per kind — with counts
341
+ kg schema --samples # + a few example ids per kind and the enum-like values a key takes
342
+ ```
343
+
344
+ What comes back is the *observed* vocabulary — a profile of what's actually in
345
+ the graph, not an enforced schema (the graph stays schemaless). The MCP tool
346
+ `kg_schema` carries an instruction to call it **first**, so an agent reads the map
347
+ before it moves. It's a plain read — pure `GROUP BY` aggregates, no gate — and
348
+ like every read it has a federated form (next) that unions the vocabulary across
349
+ many ontologies at once.
350
+
351
+ ---
352
+
353
+ ## Cross-ontology: federation and the backbone
354
+
355
+ The control plane routes to *one* ontology per call. Two layers sit on top to work
356
+ across *many* at once: **reads federate, writes go through a backbone.**
357
+
358
+ ```mermaid
359
+ flowchart TD
360
+ Q["kg fed node person:ada"] --> FED["federation.py<br/>multithreaded fan-out"]
361
+ FED -.->|own thread + connection| O1[("people")]
362
+ FED -.->|own thread + connection| O2[("papers")]
363
+ FED -.->|own thread + connection| O3[("coffee")]
364
+ O1 --> M["merge, tagged by source<br/>(identity-aware)"]
365
+ O2 --> M
366
+ O3 --> M
367
+
368
+ L["kg link same-as …"] --> BB["backbone.py"]
369
+ BB --> IDX[["index.db — the backbone<br/>Ref + Prefix nodes, SAME_AS edges<br/>gated + logged"]]
370
+ ```
371
+
372
+ ### Federation — cross-ontology reads, multithreaded
373
+
374
+ A federated read is a *fan-out*: open each member ontology in its own thread (its
375
+ own connection — SQLite releases the GIL during a query, so N ontologies read
376
+ **concurrently**) and merge the results tagged by source. There's no separate
377
+ "federated" API: every base read takes an optional `ontologies=[...]` scope.
378
+
379
+ ```python
380
+ from kgrdbms import Federation
381
+
382
+ fed = Federation(["people", "papers", "coffee"]) # or Federation.all()
383
+ fed.schema() # unioned vocabulary + a per-ontology breakdown
384
+ fed.nodes_by_kind("Person") # [Located(ontology, node), ...] — tagged by source
385
+ fed.node("person:ada") # every occurrence across worlds, identity-merged
386
+ ```
387
+
388
+ ```bash
389
+ kg fed schema # union vocabulary across ALL ontologies
390
+ kg fed node person:ada # find an id across the federation (identity-aware)
391
+ ```
392
+
393
+ Federation never writes and never silently drops a member (a member that raises
394
+ propagates); `parallel=False` forces sequential for debugging.
395
+
396
+ ### The backbone — cross-ontology links
397
+
398
+ A leaf edge can't cross ontologies: its foreign key lives in one file. The backbone
399
+ is where cross-ontology structure lives, and it needs **no new storage — it *is* the
400
+ index graph** growing new kinds. A link becomes a lightweight `Ref` proxy node per
401
+ endpoint (id `<ontology>::<node_id>`, FK-satisfied *inside* the index) joined by an
402
+ edge; the prefix registry becomes `Prefix` nodes. Both go through the **same gated +
403
+ logged `service` path**, so a cross-domain assertion is audited, reversible, and
404
+ replayable exactly like leaf data.
405
+
406
+ ```bash
407
+ kg link add coffee drink:latte ENJOYED_BY people person:ada # a typed cross-ontology edge
408
+ kg link same-as people person:ada wiki person:ada-lovelace # "same real-world entity" (symmetric)
409
+ kg link cluster people person:ada # the transitive SAME_AS cluster
410
+ kg prefix add person https://kg.local/person/ # CURIE prefix -> IRI (identity backbone)
411
+ ```
412
+
413
+ **Identity is opt-in per ontology.** By default identity is *local* — `person:ada`
414
+ in two ontologies are different nodes until you link them via the backbone. An
415
+ ontology created with `--shared-identity` opts into *global* identity, and federation
416
+ then treats same-CURIE nodes across such ontologies as the **same** entity and merges
417
+ them.
418
+
419
+ ---
420
+
330
421
  ## Event sourcing: the graph is a projection
331
422
 
332
423
  The graph you query is a cache. The **append-only event log is the source of
@@ -394,6 +485,55 @@ replay(graph, events, genesis=genesis, upto_ts=ts) # ...as of an instant
394
485
 
395
486
  ---
396
487
 
488
+ ## Virtual edges: relationships you don't store
489
+
490
+ Event sourcing is the right model for *curated facts* — but the wrong one for a
491
+ high-cardinality, machine-generated relationship layer that already lives, fresh
492
+ and authoritative, in some operational store. Mirroring 100k correlation edges
493
+ into the graph (and the log) every night is wasteful and instantly stale.
494
+
495
+ A **virtual edge** inverts that. The ontology stores only a *binding* — an edge
496
+ TYPE plus the SQL that resolves its instances against an external source. At
497
+ traversal time the resolver runs that query, parameterized by the node you're
498
+ standing on, and synthesizes the edges live. Zero copy, always current, one
499
+ source of truth — Ontology-Based Data Access in the graph's own terms.
500
+
501
+ ```python
502
+ # Bind CO_HELD_WITH to a query over the operational store (here, any DB-API source)
503
+ kg_virtual_edge_add(
504
+ edge_type="CO_HELD_WITH",
505
+ query="SELECT b AS to_id, shared FROM co_held WHERE a = ?", # '?' sqlite, '%s' postgres
506
+ dsn_env="OUROBOROS_DSN", # credentials by reference — never in the graph
507
+ source="id_slug", # company:NVDA -> bind "NVDA"
508
+ target_id_template="company:{value}",
509
+ prop_cols=["shared"], directions="both", ontology="market",
510
+ )
511
+
512
+ # now ordinary traversal unions stored + virtual edges; virtual ones carry _virtual:true
513
+ kg_edges("company:NVDA", direction="out", ontology="market")
514
+ # -> [{to: "company:AMD", type: "CO_HELD_WITH", properties: {shared: 12, _virtual: true}, …}]
515
+ ```
516
+
517
+ Two properties keep it safe and simple:
518
+
519
+ - **Read-only.** Virtual edges are never written, so they sidestep the whole
520
+ gated-write / event-log / compensation machinery. A binding is config; the
521
+ edges are a view. Nothing to invalidate, replay, or undo.
522
+ - **Parameterized, never interpolated.** The SQL template is operator-authored;
523
+ the per-node value is always *bound* through the driver, never formatted into
524
+ the string. Credentials ride `dsn_env` (an env-var name), so secrets stay out
525
+ of the graph and out of version control.
526
+
527
+ Bindings live as reserved-kind (`_VirtualEdge`) nodes *in the ontology*, so they
528
+ travel with it and version alongside the schema. This is the seam for a
529
+ schema-graph / data-graph split: keep the curated **schema** (types, contracts,
530
+ doctrine) in a portable SQLite ontology, and **virtualize the populated extension**
531
+ straight out of your system-of-record — no ETL, no drift.
532
+
533
+ Tools: `kg_virtual_edge_add`, `kg_virtual_edges_list`, `kg_virtual_edge_remove`.
534
+
535
+ ---
536
+
397
537
  ## The safety gate: invariants vs. policy
398
538
 
399
539
  When you expose the graph for live mutation — especially to an AI agent over
@@ -541,18 +681,20 @@ Or hand-edit a client config (e.g. Claude Desktop):
541
681
  { "mcpServers": { "kgrdbms": { "command": "kgrdbms-mcp" } } }
542
682
  ```
543
683
 
544
- It exposes `kg_`-prefixed tools for reads (`kg_schema` the vocabulary, meant to
545
- be called first; `kg_node_get`, `kg_nodes_by_kind`,
546
- `kg_neighborhood`, `kg_shortest_path`, `kg_descendants`, …), gated writes
547
- (`kg_node_upsert`, `kg_edge_add`, `kg_node_delete`, …), bulk composition
548
- (`kg_import` a whole `{nodes, edges}` batch in one call, so an agent populates
549
- an ontology in a single tool call instead of dozens), RDF interop
550
- (`kg_rdf_export`, `kg_rdf_import` see below), and the event log
551
- (`kg_events_tail`, `kg_event_revert`, `kg_replay`). Every write passes through
552
- the invariants + policy gate and is recorded same engine, same file as the
553
- CLI. Every tool also takes an optional `ontology` name (omit for the default),
554
- and `kg_ontologies_list` / `kg_ontology_create` manage the registryso an
555
- agent can discover, create, and route between ontologies entirely over MCP.
684
+ It exposes `kg_`-prefixed tools over one engine. Reads — `kg_schema` (the
685
+ vocabulary, meant to be called first), `kg_node_get`, `kg_find` (by kind and/or
686
+ label), `kg_edges`, `kg_neighborhood`, `kg_shortest_path`, `kg_descendants` each
687
+ take an optional `ontologies=[...]` to fan out across many ontologies in one
688
+ call. Then gated writes (`kg_node_upsert`, `kg_edge_add`, `kg_edge_remove`,
689
+ `kg_node_delete`), bulk composition (`kg_import` a whole `{nodes, edges}` batch in
690
+ one call, so an agent populates an ontology in a single tool call instead of dozens),
691
+ the cross-ontology backbone (`kg_link`, `kg_links_of`, `kg_identity`, `kg_prefix_add`,
692
+ `kg_prefix_resolve`), RDF interop (`kg_rdf_export`, `kg_rdf_import`see below), and
693
+ the event log (`kg_events_tail`, `kg_event_revert`, `kg_replay`). Every write passes
694
+ the invariants + policy gate and is recorded same engine, same file as the CLI.
695
+ Every tool takes an optional `ontology` name, and `kg_ontologies_list` /
696
+ `kg_ontology_create` / `kg_ontology_delete` manage the registry — so an agent can
697
+ discover, create, route between, and delete ontologies entirely over MCP.
556
698
 
557
699
  ---
558
700
 
@@ -573,7 +715,7 @@ that data by `bench/charts.py`. Run both on your own machine in one command.
573
715
 
574
716
  ### Writes — the batching lever
575
717
 
576
- ![Write throughput — batch the commit, ~10× faster](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/master/assets/write_throughput.png)
718
+ ![Write throughput — batch the commit, ~10× faster](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/main/assets/write_throughput.png)
577
719
 
578
720
  Each single write commits on its own for durability. Wrapping a bulk load in
579
721
  `batch()` / `add_nodes` / `add_edges` collapses those per-call commits into one
@@ -584,7 +726,7 @@ thousands per second.
584
726
 
585
727
  ### Reads — fast, with an honest tail
586
728
 
587
- ![Read latency — p50 marker, whisker to p99, log scale](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/master/assets/read_latency.png)
729
+ ![Read latency — p50 marker, whisker to p99, log scale](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/main/assets/read_latency.png)
588
730
 
589
731
  Point lookups land in single-digit microseconds, and multi-node reads hydrate
590
732
  the whole result set in a constant number of queries (no N+1 fan-out). The chart
@@ -598,7 +740,7 @@ SQLite engine runs under CPython, Node, and Bun, so the gap between them is pure
598
740
  binding overhead — under 2×, and it doesn't even favor one runtime across
599
741
  operations.
600
742
 
601
- ![Same SQLite across CPython, Node, and Bun](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/master/assets/runtimes.png)
743
+ ![Same SQLite across CPython, Node, and Bun](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/main/assets/runtimes.png)
602
744
 
603
745
  The lever that actually moved the needle was transaction batching (~10×, above),
604
746
  not the language. Reproduce it with `python bench/runtimes/compare.py`.
@@ -608,7 +750,7 @@ not the language. Reproduce it with `python bench/runtimes/compare.py`.
608
750
  We measured it against Neo4j — same graph, same queries, identical methodology
609
751
  (full harness and reproduction in [`bench/neo4j/`](bench/neo4j/README.md)):
610
752
 
611
- ![Where the crossover is — kgrdbms vs Neo4j](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/master/assets/crossover.png)
753
+ ![Where the crossover is — kgrdbms vs Neo4j](https://raw.githubusercontent.com/cunicopia-dev/knowledge-graph-rdbms/main/assets/crossover.png)
612
754
 
613
755
  Queries compile to SQL over B-tree indexes, so each traversal hop is an index
614
756
  lookup — wonderfully cheap for point reads and shallow traversals. An in-process
@@ -737,7 +879,18 @@ replayable.
737
879
  | `kg rdf export [--format F]` | serialize to Turtle/N-Triples (RDF-star) |
738
880
  | `kg rdf import FILE` | load RDF back in (gated + logged) |
739
881
  | `kg ontology list` | list registered ontologies (the registry) |
740
- | `kg ontology create NAME …` | register an ontology (`--backend`, `--stance`) |
882
+ | `kg ontology create NAME …` | register an ontology (`--backend`, `--stance`, `--shared-identity`) |
883
+ | `kg ontology delete NAME [--purge]` | deregister an ontology (`--purge` also deletes its data) |
884
+ | `kg fed schema [--samples]` | union vocabulary across ALL ontologies (multithreaded fan-out) |
885
+ | `kg fed stats` | node/edge totals across the federation |
886
+ | `kg fed nodes-by-kind KIND` / `nodes-by-label LABEL` | nodes across ontologies, tagged by source |
887
+ | `kg fed node ID` | find an id across the federation (identity-aware) |
888
+ | `kg link add FROM_ONT FROM TYPE TO_ONT TO` | cross-ontology edge (the backbone) |
889
+ | `kg link same-as A FROM B TO` | assert two nodes are the same entity (symmetric) |
890
+ | `kg link of ONT ID` | cross-ontology links touching a node |
891
+ | `kg link cluster ONT ID` | the transitive SAME_AS identity cluster |
892
+ | `kg prefix add P IRI_BASE` | bind a CURIE prefix to an IRI base |
893
+ | `kg prefix expand CURIE` / `contract IRI` | CURIE ↔ IRI via the registry |
741
894
  | `kg serve [--transport T]` | run the MCP server |
742
895
 
743
896
  Add `--json` to any command for machine-readable output. Target a graph with
@@ -756,6 +909,8 @@ kgrdbms/
756
909
  ├── invariants.py # compiled-in invariants, checked before policy (no-op default)
757
910
  ├── service.py # the shared gated + logged write path
758
911
  ├── resolver.py # control plane: ontology name → (backend, events, entry) + the index
912
+ ├── federation.py # cross-ontology reads: multithreaded fan-out, identity-aware merge
913
+ ├── backbone.py # cross-ontology links + prefix/IRI registry (lives in the index graph)
759
914
  ├── backends/ # pluggable engine registry
760
915
  │ ├── base.py # GraphBackend protocol + raising stub skeleton
761
916
  │ ├── sqlite.py # live engine (adapter over Graph)
@@ -777,7 +932,7 @@ own. Everything else layers on top; `service.py` depends only on the
777
932
  ```bash
778
933
  git clone <repo> && cd knowledge-graph-rdbms
779
934
  uv venv && uv pip install -e ".[dev]"
780
- pytest # 62 tests
935
+ pytest # 107 tests
781
936
  python bench/benchmark.py # benchmark with p50–p99 (see bench/README.md)
782
937
  ```
783
938