knowledge-graph-rdbms 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowledge_graph_rdbms-0.1.2/.github/workflows/ci.yml +46 -0
- knowledge_graph_rdbms-0.1.2/.github/workflows/publish.yml +45 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/PKG-INFO +76 -7
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/README.md +72 -6
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/__init__.py +1 -1
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/cli.py +71 -1
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/mcp_server.py +61 -1
- knowledge_graph_rdbms-0.1.2/kgrdbms/rdf.py +647 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/pyproject.toml +3 -2
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/tests/test_cli.py +42 -0
- knowledge_graph_rdbms-0.1.2/tests/test_rdf.py +185 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/.claude/skills/kg-compose/SKILL.md +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/.gitignore +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/CLAUDE.md +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/CODE_OF_CONDUCT.md +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/CONTRIBUTING.md +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/LICENSE +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/SECURITY.md +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/assets/crossover.png +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/assets/read_latency.png +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/assets/runtimes.png +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/assets/write_throughput.png +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/bench/README.md +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/bench/benchmark.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/bench/charts.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/bench/neo4j/README.md +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/bench/neo4j/headtohead.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/bench/postgres/README.md +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/bench/postgres/benchmark.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/bench/postgres/charts.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/bench/runtimes/compare.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/bench/runtimes/run_bun.js +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/bench/runtimes/run_node.mjs +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/bench/runtimes/run_python.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/backends/__init__.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/backends/base.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/backends/neo4j.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/backends/postgres.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/backends/sqlite.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/events.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/graph.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/invariants.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/policy.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/resolver.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/kgrdbms/service.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/tests/test_bulk.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/tests/test_events.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/tests/test_graph.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/tests/test_mcp_server.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/tests/test_policy.py +0 -0
- {knowledge_graph_rdbms-0.1.0 → knowledge_graph_rdbms-0.1.2}/tests/test_postgres.py +0 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
# Runs the test suite on every push to master and every pull request.
|
|
4
|
+
# Make the `ci` job below a required status check in branch protection to
|
|
5
|
+
# gate merges on a green suite.
|
|
6
|
+
|
|
7
|
+
on:
|
|
8
|
+
push:
|
|
9
|
+
branches: [master]
|
|
10
|
+
pull_request:
|
|
11
|
+
branches: [master]
|
|
12
|
+
|
|
13
|
+
concurrency:
|
|
14
|
+
group: ci-${{ github.ref }}
|
|
15
|
+
cancel-in-progress: true
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
test:
|
|
19
|
+
name: pytest (py${{ matrix.python-version }})
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
strategy:
|
|
22
|
+
fail-fast: false
|
|
23
|
+
matrix:
|
|
24
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v4
|
|
27
|
+
- uses: astral-sh/setup-uv@v5
|
|
28
|
+
- run: uv venv --python ${{ matrix.python-version }}
|
|
29
|
+
- run: uv pip install -e ".[dev]"
|
|
30
|
+
- run: uv run pytest -q
|
|
31
|
+
|
|
32
|
+
# Single stable check to require in branch protection — passes only if every
|
|
33
|
+
# matrix leg above passed (so you don't have to list each Python version).
|
|
34
|
+
ci:
|
|
35
|
+
name: ci
|
|
36
|
+
needs: test
|
|
37
|
+
runs-on: ubuntu-latest
|
|
38
|
+
if: always()
|
|
39
|
+
steps:
|
|
40
|
+
- name: Verify all test jobs passed
|
|
41
|
+
run: |
|
|
42
|
+
if [ "${{ needs.test.result }}" != "success" ]; then
|
|
43
|
+
echo "test matrix failed: ${{ needs.test.result }}"
|
|
44
|
+
exit 1
|
|
45
|
+
fi
|
|
46
|
+
echo "all test jobs passed"
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
# Publishes on a version tag (e.g. `git tag v0.1.1 && git push --tags`).
|
|
4
|
+
# Auth is PyPI Trusted Publishing (OIDC) — no token stored anywhere.
|
|
5
|
+
# One-time setup at https://pypi.org/manage/account/publishing/ :
|
|
6
|
+
# PyPI project name: knowledge-graph-rdbms
|
|
7
|
+
# Owner: cunicopia-dev
|
|
8
|
+
# Repository: knowledge-graph-rdbms
|
|
9
|
+
# Workflow name: publish.yml
|
|
10
|
+
# Environment: pypi
|
|
11
|
+
#
|
|
12
|
+
# This workflow uses no event-provided input in run: steps (no injection surface).
|
|
13
|
+
|
|
14
|
+
on:
|
|
15
|
+
push:
|
|
16
|
+
tags: ["v*.*.*"]
|
|
17
|
+
workflow_dispatch: {}
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
build:
|
|
21
|
+
name: Build sdist + wheel
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
steps:
|
|
24
|
+
- uses: actions/checkout@v4
|
|
25
|
+
- uses: astral-sh/setup-uv@v5
|
|
26
|
+
- run: uv build
|
|
27
|
+
- run: uvx twine check dist/*
|
|
28
|
+
- uses: actions/upload-artifact@v4
|
|
29
|
+
with:
|
|
30
|
+
name: dist
|
|
31
|
+
path: dist/
|
|
32
|
+
|
|
33
|
+
publish:
|
|
34
|
+
name: Publish (Trusted Publishing)
|
|
35
|
+
needs: build
|
|
36
|
+
runs-on: ubuntu-latest
|
|
37
|
+
environment: pypi
|
|
38
|
+
permissions:
|
|
39
|
+
id-token: write # mint the short-lived OIDC token PyPI verifies
|
|
40
|
+
steps:
|
|
41
|
+
- uses: actions/download-artifact@v4
|
|
42
|
+
with:
|
|
43
|
+
name: dist
|
|
44
|
+
path: dist/
|
|
45
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: knowledge-graph-rdbms
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: A label property graph on an RDBMS (SQLite): nodes, typed edges, an append-only event log, and an optional MCP server.
|
|
5
5
|
Project-URL: Homepage, https://github.com/cunicopia-dev/knowledge-graph-rdbms
|
|
6
6
|
Project-URL: Repository, https://github.com/cunicopia-dev/knowledge-graph-rdbms
|
|
@@ -20,10 +20,13 @@ Provides-Extra: dev
|
|
|
20
20
|
Requires-Dist: mcp>=1.0; extra == 'dev'
|
|
21
21
|
Requires-Dist: psycopg[binary]>=3.1; extra == 'dev'
|
|
22
22
|
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
23
|
+
Requires-Dist: rdflib>=7.0; extra == 'dev'
|
|
23
24
|
Provides-Extra: mcp
|
|
24
25
|
Requires-Dist: mcp>=1.0; extra == 'mcp'
|
|
25
26
|
Provides-Extra: postgres
|
|
26
27
|
Requires-Dist: psycopg[binary]>=3.1; extra == 'postgres'
|
|
28
|
+
Provides-Extra: rdf
|
|
29
|
+
Requires-Dist: rdflib>=7.0; extra == 'rdf'
|
|
27
30
|
Description-Content-Type: text/markdown
|
|
28
31
|
|
|
29
32
|
# knowledge-graph-rdbms
|
|
@@ -31,7 +34,7 @@ Description-Content-Type: text/markdown
|
|
|
31
34
|

|
|
32
35
|

|
|
33
36
|

|
|
34
|
-

|
|
35
38
|

|
|
36
39
|

|
|
37
40
|
|
|
@@ -79,6 +82,7 @@ Small enough to hold in your head. Flexible enough to model anything.
|
|
|
79
82
|
- [Install](#install)
|
|
80
83
|
- [Quickstart](#quickstart)
|
|
81
84
|
- [Performance](#performance)
|
|
85
|
+
- [RDF interop: export, SPARQL, RDF-star](#rdf-interop-export-sparql-rdf-star)
|
|
82
86
|
- [Command reference](#command-reference)
|
|
83
87
|
- [Project layout](#project-layout)
|
|
84
88
|
- [Development](#development)
|
|
@@ -570,7 +574,8 @@ It exposes `kg_`-prefixed tools for reads (`kg_node_get`, `kg_nodes_by_kind`,
|
|
|
570
574
|
`kg_neighborhood`, `kg_shortest_path`, `kg_descendants`, …), gated writes
|
|
571
575
|
(`kg_node_upsert`, `kg_edge_add`, `kg_node_delete`, …), bulk composition
|
|
572
576
|
(`kg_import` — a whole `{nodes, edges}` batch in one call, so an agent populates
|
|
573
|
-
an ontology in a single tool call instead of dozens),
|
|
577
|
+
an ontology in a single tool call instead of dozens), RDF interop
|
|
578
|
+
(`kg_rdf_export`, `kg_rdf_import` — see below), and the event log
|
|
574
579
|
(`kg_events_tail`, `kg_event_revert`, `kg_replay`). Every write passes through
|
|
575
580
|
the invariants + policy gate and is recorded — same engine, same file as the
|
|
576
581
|
CLI. Every tool also takes an optional `ontology` name (omit for the default),
|
|
@@ -596,7 +601,7 @@ that data by `bench/charts.py`. Run both on your own machine in one command.
|
|
|
596
601
|
|
|
597
602
|
### Writes — the batching lever
|
|
598
603
|
|
|
599
|
-

|
|
604
|
+

|
|
600
605
|
|
|
601
606
|
Each single write commits on its own for durability. Wrapping a bulk load in
|
|
602
607
|
`batch()` / `add_nodes` / `add_edges` collapses those per-call commits into one
|
|
@@ -607,7 +612,7 @@ thousands per second.
|
|
|
607
612
|
|
|
608
613
|
### Reads — fast, with an honest tail
|
|
609
614
|
|
|
610
|
-

|
|
615
|
+

|
|
611
616
|
|
|
612
617
|
Point lookups land in single-digit microseconds, and multi-node reads hydrate
|
|
613
618
|
the whole result set in a constant number of queries (no N+1 fan-out). The chart
|
|
@@ -621,7 +626,7 @@ SQLite engine runs under CPython, Node, and Bun, so the gap between them is pure
|
|
|
621
626
|
binding overhead — under 2×, and it doesn't even favor one runtime across
|
|
622
627
|
operations.
|
|
623
628
|
|
|
624
|
-

|
|
629
|
+

|
|
625
630
|
|
|
626
631
|
The lever that actually moved the needle was transaction batching (~10×, above),
|
|
627
632
|
not the language. Reproduce it with `python bench/runtimes/compare.py`.
|
|
@@ -631,7 +636,7 @@ not the language. Reproduce it with `python bench/runtimes/compare.py`.
|
|
|
631
636
|
We measured it against Neo4j — same graph, same queries, identical methodology
|
|
632
637
|
(full harness and reproduction in [`bench/neo4j/`](bench/neo4j/README.md)):
|
|
633
638
|
|
|
634
|
-

|
|
639
|
+

|
|
635
640
|
|
|
636
641
|
Queries compile to SQL over B-tree indexes, so each traversal hop is an index
|
|
637
642
|
lookup — wonderfully cheap for point reads and shallow traversals. An in-process
|
|
@@ -672,6 +677,67 @@ ontology to it while the hot, shallow ones stay embedded.
|
|
|
672
677
|
|
|
673
678
|
---
|
|
674
679
|
|
|
680
|
+
## RDF interop: export, SPARQL, RDF-star
|
|
681
|
+
|
|
682
|
+
The store stays a label property graph. RDF is a **boundary** format here, not a
|
|
683
|
+
storage model — there is no triplestore, no OWL, no embedded SPARQL engine. The
|
|
684
|
+
project adopts exactly one RDF idea, because it is the only one expensive to
|
|
685
|
+
retrofit: **stable identity** (CURIE node ids). This is where a CURIE finally
|
|
686
|
+
expands into a real IRI.
|
|
687
|
+
|
|
688
|
+
```bash
|
|
689
|
+
kg rdf export # Turtle, to stdout (RDF-star edges)
|
|
690
|
+
kg rdf export --format ntriples --out graph.nt
|
|
691
|
+
kg rdf import graph.nt # back in — gated, logged, replayable
|
|
692
|
+
```
|
|
693
|
+
|
|
694
|
+
Export is **dependency-free**. Node ids round-trip as CURIEs, because the prefix
|
|
695
|
+
binding makes the IRI collapse back to exactly what you stored:
|
|
696
|
+
|
|
697
|
+
```turtle
|
|
698
|
+
@prefix person: <https://kg.local/person/> .
|
|
699
|
+
@prefix kg: <https://kg.local/vocab#> .
|
|
700
|
+
|
|
701
|
+
person:ada a kg:Person ;
|
|
702
|
+
kg:name "Ada Lovelace" ;
|
|
703
|
+
prop:born "1815"^^xsd:integer ;
|
|
704
|
+
rel:influences person:grace .
|
|
705
|
+
|
|
706
|
+
<< person:ada rel:influences person:grace >> prop:since "2020"^^xsd:integer .
|
|
707
|
+
```
|
|
708
|
+
|
|
709
|
+
That last line is the interesting one. A plain triple has nowhere to hang an
|
|
710
|
+
**edge's** properties; `--edge-strategy` decides how they cross:
|
|
711
|
+
|
|
712
|
+
| strategy | edge `{since: 2020}` becomes | when to use |
|
|
713
|
+
| ------------------ | ----------------------------------------------------- | -------------------------------------- |
|
|
714
|
+
| `rdf-star` (default) | `<< :ada :influences :grace >> :since 2020` | star-aware stores (Stardog, Jena 4.3+, Oxigraph) |
|
|
715
|
+
| `reification` | an `rdf:Statement` node carrying s/p/o + each property | rdflib and any RDF 1.1 tool |
|
|
716
|
+
| `lossy` | the bare triple; properties dropped (count reported) | you only want topology |
|
|
717
|
+
|
|
718
|
+
**SPARQL?** Yes — against the export, in any store. You don't embed a query
|
|
719
|
+
engine (that would betray the zero-dependency, no-SPARQL design); you emit a
|
|
720
|
+
graph a real engine can query:
|
|
721
|
+
|
|
722
|
+
```python
|
|
723
|
+
import rdflib
|
|
724
|
+
from kgrdbms import rdf
|
|
725
|
+
|
|
726
|
+
# rdflib is RDF 1.1 — no star — so emit reification for it:
|
|
727
|
+
ttl = rdf.export(graph, "turtle", rdf.IriContext(edge_strategy="reification"))
|
|
728
|
+
store = rdflib.Graph(); store.parse(data=ttl, format="turtle")
|
|
729
|
+
store.query("SELECT ?s ?o WHERE { ?s <https://kg.local/rel/influences> ?o }")
|
|
730
|
+
```
|
|
731
|
+
|
|
732
|
+
For SPARQL-**star** over the `rdf-star` export, hand the Turtle to a star-native
|
|
733
|
+
store (Stardog, Jena, Oxigraph, GraphDB). Turtle/foreign-RDF *import* needs the
|
|
734
|
+
optional extra (`pip install "knowledge-graph-rdbms[rdf]"`, which pulls
|
|
735
|
+
`rdflib`); N-Triples import and **all** export stay dependency-free. Imported RDF
|
|
736
|
+
rides the same gated, logged path as every other write, so it is audited and
|
|
737
|
+
replayable.
|
|
738
|
+
|
|
739
|
+
---
|
|
740
|
+
|
|
675
741
|
## Command reference
|
|
676
742
|
|
|
677
743
|
| Command | What it does |
|
|
@@ -695,6 +761,8 @@ ontology to it while the hot, shallow ones stay embedded.
|
|
|
695
761
|
| `kg revert EVENT_ID` | undo an event (compensating event) |
|
|
696
762
|
| `kg replay [--upto TS]` | rebuild the projection from the log |
|
|
697
763
|
| `kg import FILE` | bulk `{nodes, edges}` import (gated + logged) |
|
|
764
|
+
| `kg rdf export [--format F]` | serialize to Turtle/N-Triples (RDF-star) |
|
|
765
|
+
| `kg rdf import FILE` | load RDF back in (gated + logged) |
|
|
698
766
|
| `kg ontology list` | list registered ontologies (the registry) |
|
|
699
767
|
| `kg ontology create NAME …` | register an ontology (`--backend`, `--stance`) |
|
|
700
768
|
| `kg serve [--transport T]` | run the MCP server |
|
|
@@ -720,6 +788,7 @@ kgrdbms/
|
|
|
720
788
|
│ ├── sqlite.py # live engine (adapter over Graph)
|
|
721
789
|
│ ├── postgres.py # live engine (psycopg; jsonb + recursive CTEs); [postgres] extra
|
|
722
790
|
│ └── neo4j.py # stub (deep-traversal escalation)
|
|
791
|
+
├── rdf.py # RDF boundary: Turtle/N-Triples export + import (RDF-star); rdflib only for [rdf] import
|
|
723
792
|
├── cli.py # the `kg` command (stdlib argparse)
|
|
724
793
|
└── mcp_server.py # the MCP server (optional [mcp] extra)
|
|
725
794
|
```
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|

|
|
4
4
|

|
|
5
5
|

|
|
6
|
-

|
|
7
7
|

|
|
8
8
|

|
|
9
9
|
|
|
@@ -51,6 +51,7 @@ Small enough to hold in your head. Flexible enough to model anything.
|
|
|
51
51
|
- [Install](#install)
|
|
52
52
|
- [Quickstart](#quickstart)
|
|
53
53
|
- [Performance](#performance)
|
|
54
|
+
- [RDF interop: export, SPARQL, RDF-star](#rdf-interop-export-sparql-rdf-star)
|
|
54
55
|
- [Command reference](#command-reference)
|
|
55
56
|
- [Project layout](#project-layout)
|
|
56
57
|
- [Development](#development)
|
|
@@ -542,7 +543,8 @@ It exposes `kg_`-prefixed tools for reads (`kg_node_get`, `kg_nodes_by_kind`,
|
|
|
542
543
|
`kg_neighborhood`, `kg_shortest_path`, `kg_descendants`, …), gated writes
|
|
543
544
|
(`kg_node_upsert`, `kg_edge_add`, `kg_node_delete`, …), bulk composition
|
|
544
545
|
(`kg_import` — a whole `{nodes, edges}` batch in one call, so an agent populates
|
|
545
|
-
an ontology in a single tool call instead of dozens),
|
|
546
|
+
an ontology in a single tool call instead of dozens), RDF interop
|
|
547
|
+
(`kg_rdf_export`, `kg_rdf_import` — see below), and the event log
|
|
546
548
|
(`kg_events_tail`, `kg_event_revert`, `kg_replay`). Every write passes through
|
|
547
549
|
the invariants + policy gate and is recorded — same engine, same file as the
|
|
548
550
|
CLI. Every tool also takes an optional `ontology` name (omit for the default),
|
|
@@ -568,7 +570,7 @@ that data by `bench/charts.py`. Run both on your own machine in one command.
|
|
|
568
570
|
|
|
569
571
|
### Writes — the batching lever
|
|
570
572
|
|
|
571
|
-

|
|
573
|
+

|
|
572
574
|
|
|
573
575
|
Each single write commits on its own for durability. Wrapping a bulk load in
|
|
574
576
|
`batch()` / `add_nodes` / `add_edges` collapses those per-call commits into one
|
|
@@ -579,7 +581,7 @@ thousands per second.
|
|
|
579
581
|
|
|
580
582
|
### Reads — fast, with an honest tail
|
|
581
583
|
|
|
582
|
-

|
|
584
|
+

|
|
583
585
|
|
|
584
586
|
Point lookups land in single-digit microseconds, and multi-node reads hydrate
|
|
585
587
|
the whole result set in a constant number of queries (no N+1 fan-out). The chart
|
|
@@ -593,7 +595,7 @@ SQLite engine runs under CPython, Node, and Bun, so the gap between them is pure
|
|
|
593
595
|
binding overhead — under 2×, and it doesn't even favor one runtime across
|
|
594
596
|
operations.
|
|
595
597
|
|
|
596
|
-

|
|
598
|
+

|
|
597
599
|
|
|
598
600
|
The lever that actually moved the needle was transaction batching (~10×, above),
|
|
599
601
|
not the language. Reproduce it with `python bench/runtimes/compare.py`.
|
|
@@ -603,7 +605,7 @@ not the language. Reproduce it with `python bench/runtimes/compare.py`.
|
|
|
603
605
|
We measured it against Neo4j — same graph, same queries, identical methodology
|
|
604
606
|
(full harness and reproduction in [`bench/neo4j/`](bench/neo4j/README.md)):
|
|
605
607
|
|
|
606
|
-

|
|
608
|
+

|
|
607
609
|
|
|
608
610
|
Queries compile to SQL over B-tree indexes, so each traversal hop is an index
|
|
609
611
|
lookup — wonderfully cheap for point reads and shallow traversals. An in-process
|
|
@@ -644,6 +646,67 @@ ontology to it while the hot, shallow ones stay embedded.
|
|
|
644
646
|
|
|
645
647
|
---
|
|
646
648
|
|
|
649
|
+
## RDF interop: export, SPARQL, RDF-star
|
|
650
|
+
|
|
651
|
+
The store stays a label property graph. RDF is a **boundary** format here, not a
|
|
652
|
+
storage model — there is no triplestore, no OWL, no embedded SPARQL engine. The
|
|
653
|
+
project adopts exactly one RDF idea, because it is the only one expensive to
|
|
654
|
+
retrofit: **stable identity** (CURIE node ids). This is where a CURIE finally
|
|
655
|
+
expands into a real IRI.
|
|
656
|
+
|
|
657
|
+
```bash
|
|
658
|
+
kg rdf export # Turtle, to stdout (RDF-star edges)
|
|
659
|
+
kg rdf export --format ntriples --out graph.nt
|
|
660
|
+
kg rdf import graph.nt # back in — gated, logged, replayable
|
|
661
|
+
```
|
|
662
|
+
|
|
663
|
+
Export is **dependency-free**. Node ids round-trip as CURIEs, because the prefix
|
|
664
|
+
binding makes the IRI collapse back to exactly what you stored:
|
|
665
|
+
|
|
666
|
+
```turtle
|
|
667
|
+
@prefix person: <https://kg.local/person/> .
|
|
668
|
+
@prefix kg: <https://kg.local/vocab#> .
|
|
669
|
+
|
|
670
|
+
person:ada a kg:Person ;
|
|
671
|
+
kg:name "Ada Lovelace" ;
|
|
672
|
+
prop:born "1815"^^xsd:integer ;
|
|
673
|
+
rel:influences person:grace .
|
|
674
|
+
|
|
675
|
+
<< person:ada rel:influences person:grace >> prop:since "2020"^^xsd:integer .
|
|
676
|
+
```
|
|
677
|
+
|
|
678
|
+
That last line is the interesting one. A plain triple has nowhere to hang an
|
|
679
|
+
**edge's** properties; `--edge-strategy` decides how they cross:
|
|
680
|
+
|
|
681
|
+
| strategy | edge `{since: 2020}` becomes | when to use |
|
|
682
|
+
| ------------------ | ----------------------------------------------------- | -------------------------------------- |
|
|
683
|
+
| `rdf-star` (default) | `<< :ada :influences :grace >> :since 2020` | star-aware stores (Stardog, Jena 4.3+, Oxigraph) |
|
|
684
|
+
| `reification` | an `rdf:Statement` node carrying s/p/o + each property | rdflib and any RDF 1.1 tool |
|
|
685
|
+
| `lossy` | the bare triple; properties dropped (count reported) | you only want topology |
|
|
686
|
+
|
|
687
|
+
**SPARQL?** Yes — against the export, in any store. You don't embed a query
|
|
688
|
+
engine (that would betray the zero-dependency, no-SPARQL design); you emit a
|
|
689
|
+
graph a real engine can query:
|
|
690
|
+
|
|
691
|
+
```python
|
|
692
|
+
import rdflib
|
|
693
|
+
from kgrdbms import rdf
|
|
694
|
+
|
|
695
|
+
# rdflib is RDF 1.1 — no star — so emit reification for it:
|
|
696
|
+
ttl = rdf.export(graph, "turtle", rdf.IriContext(edge_strategy="reification"))
|
|
697
|
+
store = rdflib.Graph(); store.parse(data=ttl, format="turtle")
|
|
698
|
+
store.query("SELECT ?s ?o WHERE { ?s <https://kg.local/rel/influences> ?o }")
|
|
699
|
+
```
|
|
700
|
+
|
|
701
|
+
For SPARQL-**star** over the `rdf-star` export, hand the Turtle to a star-native
|
|
702
|
+
store (Stardog, Jena, Oxigraph, GraphDB). Turtle/foreign-RDF *import* needs the
|
|
703
|
+
optional extra (`pip install "knowledge-graph-rdbms[rdf]"`, which pulls
|
|
704
|
+
`rdflib`); N-Triples import and **all** export stay dependency-free. Imported RDF
|
|
705
|
+
rides the same gated, logged path as every other write, so it is audited and
|
|
706
|
+
replayable.
|
|
707
|
+
|
|
708
|
+
---
|
|
709
|
+
|
|
647
710
|
## Command reference
|
|
648
711
|
|
|
649
712
|
| Command | What it does |
|
|
@@ -667,6 +730,8 @@ ontology to it while the hot, shallow ones stay embedded.
|
|
|
667
730
|
| `kg revert EVENT_ID` | undo an event (compensating event) |
|
|
668
731
|
| `kg replay [--upto TS]` | rebuild the projection from the log |
|
|
669
732
|
| `kg import FILE` | bulk `{nodes, edges}` import (gated + logged) |
|
|
733
|
+
| `kg rdf export [--format F]` | serialize to Turtle/N-Triples (RDF-star) |
|
|
734
|
+
| `kg rdf import FILE` | load RDF back in (gated + logged) |
|
|
670
735
|
| `kg ontology list` | list registered ontologies (the registry) |
|
|
671
736
|
| `kg ontology create NAME …` | register an ontology (`--backend`, `--stance`) |
|
|
672
737
|
| `kg serve [--transport T]` | run the MCP server |
|
|
@@ -692,6 +757,7 @@ kgrdbms/
|
|
|
692
757
|
│ ├── sqlite.py # live engine (adapter over Graph)
|
|
693
758
|
│ ├── postgres.py # live engine (psycopg; jsonb + recursive CTEs); [postgres] extra
|
|
694
759
|
│ └── neo4j.py # stub (deep-traversal escalation)
|
|
760
|
+
├── rdf.py # RDF boundary: Turtle/N-Triples export + import (RDF-star); rdflib only for [rdf] import
|
|
695
761
|
├── cli.py # the `kg` command (stdlib argparse)
|
|
696
762
|
└── mcp_server.py # the MCP server (optional [mcp] extra)
|
|
697
763
|
```
|
|
@@ -24,7 +24,7 @@ import json
|
|
|
24
24
|
import sys
|
|
25
25
|
from typing import Any
|
|
26
26
|
|
|
27
|
-
from kgrdbms import __version__, resolver, service
|
|
27
|
+
from kgrdbms import __version__, rdf, resolver, service
|
|
28
28
|
from kgrdbms.events import EventLog
|
|
29
29
|
from kgrdbms.graph import Edge, Graph, Node
|
|
30
30
|
from kgrdbms.invariants import InvariantViolation
|
|
@@ -358,6 +358,53 @@ def cmd_import(app: App, args) -> int:
|
|
|
358
358
|
return 0
|
|
359
359
|
|
|
360
360
|
|
|
361
|
+
def cmd_rdf_export(app: App, args) -> int:
|
|
362
|
+
"""Serialize the whole graph to RDF (Turtle/N-Triples, RDF-star by default).
|
|
363
|
+
|
|
364
|
+
Export is dependency-free. `--edge-strategy` chooses how edge properties
|
|
365
|
+
cross: rdf-star (quoted triples, default), reification (rdf:Statement), or
|
|
366
|
+
lossy (bare triples — the dropped count is reported, never silent).
|
|
367
|
+
"""
|
|
368
|
+
ctx = rdf.IriContext(edge_strategy=args.edge_strategy)
|
|
369
|
+
triples = rdf.export_graph(app.graph, ctx)
|
|
370
|
+
if args.format in ("turtle", "ttl"):
|
|
371
|
+
text = rdf.to_turtle(triples, ctx)
|
|
372
|
+
else:
|
|
373
|
+
text = rdf.to_ntriples(triples)
|
|
374
|
+
dropped = getattr(triples, "dropped_edge_props", 0)
|
|
375
|
+
|
|
376
|
+
if args.out:
|
|
377
|
+
with open(args.out, "w", encoding="utf-8") as fh:
|
|
378
|
+
fh.write(text)
|
|
379
|
+
note = f" ({dropped} edge-property values dropped by lossy)" if dropped else ""
|
|
380
|
+
app.emit(
|
|
381
|
+
{"format": args.format, "triples": len(triples), "out": args.out, "dropped_edge_props": dropped},
|
|
382
|
+
f"wrote {len(triples):,} triples to {args.out}{note}",
|
|
383
|
+
)
|
|
384
|
+
elif app.as_json:
|
|
385
|
+
app.emit({"format": args.format, "triples": len(triples), "dropped_edge_props": dropped, "rdf": text})
|
|
386
|
+
else:
|
|
387
|
+
if dropped:
|
|
388
|
+
print(f"# note: {dropped} edge-property values dropped by lossy strategy", file=sys.stderr)
|
|
389
|
+
print(text, end="") # raw RDF to stdout, pipeable
|
|
390
|
+
return 0
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def cmd_rdf_import(app: App, args) -> int:
|
|
394
|
+
"""Load RDF into the graph through the gated + logged path (replayable).
|
|
395
|
+
|
|
396
|
+
N-Triples import is dependency-free; Turtle import needs the 'rdf' extra
|
|
397
|
+
(rdflib). `--edge-strategy` must match how the RDF encoded its edges.
|
|
398
|
+
"""
|
|
399
|
+
with open(args.file, encoding="utf-8") as fh:
|
|
400
|
+
text = fh.read()
|
|
401
|
+
ctx = rdf.IriContext(edge_strategy=args.edge_strategy)
|
|
402
|
+
res = rdf.import_rdf(app.graph, app.events, text, fmt=args.format, ctx=ctx, actor=args.actor)
|
|
403
|
+
app.emit(res, f"imported {res['nodes_imported']:,} nodes and "
|
|
404
|
+
f"{res['edges_imported']:,} edges from {args.file} (gated + logged)")
|
|
405
|
+
return 0
|
|
406
|
+
|
|
407
|
+
|
|
361
408
|
def cmd_serve(app: App, args) -> int:
|
|
362
409
|
try:
|
|
363
410
|
from kgrdbms import mcp_server
|
|
@@ -505,6 +552,29 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
505
552
|
a.add_argument("--actor", default="cli-import")
|
|
506
553
|
a.set_defaults(func=cmd_import)
|
|
507
554
|
|
|
555
|
+
# ---- RDF boundary ----
|
|
556
|
+
rdfp = sub.add_parser("rdf", help="export/import RDF (Turtle / N-Triples, RDF-star)")
|
|
557
|
+
rdfsub = rdfp.add_subparsers(dest="action", required=True)
|
|
558
|
+
|
|
559
|
+
a = rdfsub.add_parser("export", help="serialize the graph to RDF (dependency-free)")
|
|
560
|
+
a.add_argument("--format", default="turtle", choices=["turtle", "ttl", "ntriples", "nt"],
|
|
561
|
+
help="turtle (default, human-readable) or ntriples (lossless, dep-free)")
|
|
562
|
+
a.add_argument("--edge-strategy", dest="edge_strategy", default="rdf-star",
|
|
563
|
+
choices=["rdf-star", "reification", "lossy"],
|
|
564
|
+
help="how edge properties cross the boundary (default: rdf-star)")
|
|
565
|
+
a.add_argument("--out", help="write to this file (default: stdout)")
|
|
566
|
+
a.set_defaults(func=cmd_rdf_export)
|
|
567
|
+
|
|
568
|
+
a = rdfsub.add_parser("import", help="load RDF into the graph (gated + logged)")
|
|
569
|
+
a.add_argument("file")
|
|
570
|
+
a.add_argument("--format", default="ntriples", choices=["ntriples", "nt", "turtle", "ttl"],
|
|
571
|
+
help="ntriples (default, dep-free) or turtle (needs the 'rdf' extra)")
|
|
572
|
+
a.add_argument("--edge-strategy", dest="edge_strategy", default="rdf-star",
|
|
573
|
+
choices=["rdf-star", "reification", "lossy"],
|
|
574
|
+
help="strategy the RDF was written with, so edges decode correctly")
|
|
575
|
+
a.add_argument("--actor", default="rdf-import")
|
|
576
|
+
a.set_defaults(func=cmd_rdf_import)
|
|
577
|
+
|
|
508
578
|
a = sub.add_parser("serve", help="run the MCP server (needs the 'mcp' extra)")
|
|
509
579
|
a.add_argument("--transport", default="stdio", choices=["stdio", "sse", "streamable-http"])
|
|
510
580
|
a.set_defaults(func=cmd_serve)
|
|
@@ -40,6 +40,10 @@ Tool surface (all prefixed kg_):
|
|
|
40
40
|
kg_import — bulk {nodes, edges} in ONE call (one batch; use this
|
|
41
41
|
to compose an ontology instead of N upsert calls)
|
|
42
42
|
|
|
43
|
+
rdf boundary
|
|
44
|
+
kg_rdf_export — serialize an ontology to Turtle/N-Triples (RDF-star)
|
|
45
|
+
kg_rdf_import — load RDF text back in (gated + logged, replayable)
|
|
46
|
+
|
|
43
47
|
event log
|
|
44
48
|
kg_events_tail — most recent events
|
|
45
49
|
kg_event_revert — reverse an event via a compensating event
|
|
@@ -55,7 +59,7 @@ from typing import Any
|
|
|
55
59
|
|
|
56
60
|
from mcp.server.fastmcp import FastMCP
|
|
57
61
|
|
|
58
|
-
from kgrdbms import resolver, service
|
|
62
|
+
from kgrdbms import rdf, resolver, service
|
|
59
63
|
from kgrdbms.graph import Edge, Node
|
|
60
64
|
from kgrdbms.resolver import Resolved
|
|
61
65
|
|
|
@@ -366,6 +370,62 @@ def kg_import(
|
|
|
366
370
|
return {"ontology": b.entry.name, **res}
|
|
367
371
|
|
|
368
372
|
|
|
373
|
+
# ---- RDF boundary: export / import ----------------------------------
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
@mcp.tool()
|
|
377
|
+
def kg_rdf_export(
|
|
378
|
+
format: str = "turtle",
|
|
379
|
+
edge_strategy: str = "rdf-star",
|
|
380
|
+
ontology: str | None = None,
|
|
381
|
+
) -> dict:
|
|
382
|
+
"""Serialize an ontology to RDF text (dependency-free).
|
|
383
|
+
|
|
384
|
+
Hand the result to any RDF store (Stardog, rdflib, Jena) to run SPARQL —
|
|
385
|
+
the LPG stays the store of record; RDF is just the boundary format.
|
|
386
|
+
|
|
387
|
+
format — "turtle" (human-readable, default) or "ntriples" (lossless)
|
|
388
|
+
edge_strategy — how edge properties cross: "rdf-star" (quoted triples,
|
|
389
|
+
default), "reification" (rdf:Statement), or "lossy"
|
|
390
|
+
(bare triples; dropped count returned, never silent)
|
|
391
|
+
|
|
392
|
+
Node ids round-trip as CURIEs (person:ada <-> <https://kg.local/person/ada>).
|
|
393
|
+
Returns {ontology, format, triples, dropped_edge_props, rdf}.
|
|
394
|
+
"""
|
|
395
|
+
b = _bundle(ontology)
|
|
396
|
+
ctx = rdf.IriContext(edge_strategy=edge_strategy)
|
|
397
|
+
triples = rdf.export_graph(b.backend, ctx)
|
|
398
|
+
text = rdf.to_turtle(triples, ctx) if format in ("turtle", "ttl") else rdf.to_ntriples(triples)
|
|
399
|
+
return {
|
|
400
|
+
"ontology": b.entry.name,
|
|
401
|
+
"format": format,
|
|
402
|
+
"triples": len(triples),
|
|
403
|
+
"dropped_edge_props": getattr(triples, "dropped_edge_props", 0),
|
|
404
|
+
"rdf": text,
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
@mcp.tool()
|
|
409
|
+
def kg_rdf_import(
|
|
410
|
+
text: str,
|
|
411
|
+
format: str = "ntriples",
|
|
412
|
+
edge_strategy: str = "rdf-star",
|
|
413
|
+
actor: str = "rdf-import",
|
|
414
|
+
ontology: str | None = None,
|
|
415
|
+
) -> dict:
|
|
416
|
+
"""Load RDF text into an ontology through the gated + logged path (replayable).
|
|
417
|
+
|
|
418
|
+
N-Triples import is dependency-free; Turtle import needs the [rdf] extra
|
|
419
|
+
(rdflib). `edge_strategy` must match how the RDF encoded its edges, so the
|
|
420
|
+
edge properties decode back onto the right edges. Returns
|
|
421
|
+
{ontology, nodes_imported, edges_imported}.
|
|
422
|
+
"""
|
|
423
|
+
b = _bundle(ontology)
|
|
424
|
+
ctx = rdf.IriContext(edge_strategy=edge_strategy)
|
|
425
|
+
res = rdf.import_rdf(b.backend, b.events, text, fmt=format, ctx=ctx, actor=actor)
|
|
426
|
+
return {"ontology": b.entry.name, **res}
|
|
427
|
+
|
|
428
|
+
|
|
369
429
|
# ---- event log: read + reversal + replay ----------------------------
|
|
370
430
|
|
|
371
431
|
|