apexgraph 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apexgraph-0.1.0/.gitattributes +3 -0
- apexgraph-0.1.0/.github/workflows/ci.yml +35 -0
- apexgraph-0.1.0/.github/workflows/publish.yml +49 -0
- apexgraph-0.1.0/.gitignore +34 -0
- apexgraph-0.1.0/CHANGELOG.md +42 -0
- apexgraph-0.1.0/CONTRIBUTING.md +37 -0
- apexgraph-0.1.0/LICENSE +21 -0
- apexgraph-0.1.0/PKG-INFO +175 -0
- apexgraph-0.1.0/README.md +141 -0
- apexgraph-0.1.0/RELEASING.md +47 -0
- apexgraph-0.1.0/examples/README.md +164 -0
- apexgraph-0.1.0/examples/sample_graph.json +1395 -0
- apexgraph-0.1.0/examples/sample_project/__init__.py +1 -0
- apexgraph-0.1.0/examples/sample_project/api/__init__.py +1 -0
- apexgraph-0.1.0/examples/sample_project/api/routes.py +100 -0
- apexgraph-0.1.0/examples/sample_project/api/server.py +62 -0
- apexgraph-0.1.0/examples/sample_project/auth/__init__.py +1 -0
- apexgraph-0.1.0/examples/sample_project/auth/service.py +77 -0
- apexgraph-0.1.0/examples/sample_project/auth/session.py +68 -0
- apexgraph-0.1.0/examples/sample_project/db/__init__.py +1 -0
- apexgraph-0.1.0/examples/sample_project/db/models.py +82 -0
- apexgraph-0.1.0/examples/sample_project/db/pool.py +99 -0
- apexgraph-0.1.0/graphex/__init__.py +8 -0
- apexgraph-0.1.0/graphex/audit.py +111 -0
- apexgraph-0.1.0/graphex/benchmark.py +297 -0
- apexgraph-0.1.0/graphex/budget.py +336 -0
- apexgraph-0.1.0/graphex/cache.py +116 -0
- apexgraph-0.1.0/graphex/cli.py +675 -0
- apexgraph-0.1.0/graphex/diff.py +176 -0
- apexgraph-0.1.0/graphex/exporter.py +119 -0
- apexgraph-0.1.0/graphex/formatter.py +287 -0
- apexgraph-0.1.0/graphex/ignore.py +87 -0
- apexgraph-0.1.0/graphex/indexer/__init__.py +6 -0
- apexgraph-0.1.0/graphex/indexer/go.py +139 -0
- apexgraph-0.1.0/graphex/indexer/project.py +266 -0
- apexgraph-0.1.0/graphex/indexer/python.py +190 -0
- apexgraph-0.1.0/graphex/indexer/typescript.py +267 -0
- apexgraph-0.1.0/graphex/injector.py +207 -0
- apexgraph-0.1.0/graphex/loader.py +670 -0
- apexgraph-0.1.0/graphex/mcp.py +463 -0
- apexgraph-0.1.0/graphex/models.py +290 -0
- apexgraph-0.1.0/graphex/retrieval/__init__.py +6 -0
- apexgraph-0.1.0/graphex/retrieval/base.py +31 -0
- apexgraph-0.1.0/graphex/retrieval/bm25.py +223 -0
- apexgraph-0.1.0/graphex/retrieval/dense.py +81 -0
- apexgraph-0.1.0/graphex/retrieval/fusion.py +71 -0
- apexgraph-0.1.0/graphex/retrieval/ppr.py +225 -0
- apexgraph-0.1.0/graphex/scorer.py +113 -0
- apexgraph-0.1.0/graphex/viz.py +321 -0
- apexgraph-0.1.0/pyproject.toml +76 -0
- apexgraph-0.1.0/tests/__init__.py +0 -0
- apexgraph-0.1.0/tests/test_audit.py +113 -0
- apexgraph-0.1.0/tests/test_benchmark.py +163 -0
- apexgraph-0.1.0/tests/test_bm25.py +177 -0
- apexgraph-0.1.0/tests/test_budget.py +137 -0
- apexgraph-0.1.0/tests/test_cache.py +63 -0
- apexgraph-0.1.0/tests/test_cli.py +137 -0
- apexgraph-0.1.0/tests/test_dense.py +42 -0
- apexgraph-0.1.0/tests/test_diff.py +100 -0
- apexgraph-0.1.0/tests/test_exporter.py +85 -0
- apexgraph-0.1.0/tests/test_formatter.py +147 -0
- apexgraph-0.1.0/tests/test_fusion.py +45 -0
- apexgraph-0.1.0/tests/test_ignore.py +99 -0
- apexgraph-0.1.0/tests/test_indexer.py +325 -0
- apexgraph-0.1.0/tests/test_injector.py +124 -0
- apexgraph-0.1.0/tests/test_loader.py +349 -0
- apexgraph-0.1.0/tests/test_mcp.py +249 -0
- apexgraph-0.1.0/tests/test_ppr.py +187 -0
- apexgraph-0.1.0/tests/test_review_fixes.py +146 -0
- apexgraph-0.1.0/tests/test_scorer.py +122 -0
- apexgraph-0.1.0/tests/test_security.py +62 -0
- apexgraph-0.1.0/tests/test_viz.py +162 -0
- apexgraph-0.1.0/uv.lock +923 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
fail-fast: false
|
|
14
|
+
matrix:
|
|
15
|
+
python-version: ["3.12", "3.13"]
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Install uv
|
|
21
|
+
uses: astral-sh/setup-uv@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: ${{ matrix.python-version }}
|
|
24
|
+
|
|
25
|
+
- name: Install dependencies
|
|
26
|
+
run: uv sync
|
|
27
|
+
|
|
28
|
+
- name: Lint (ruff)
|
|
29
|
+
run: uv run ruff check .
|
|
30
|
+
|
|
31
|
+
- name: Format check (black)
|
|
32
|
+
run: uv run black --check .
|
|
33
|
+
|
|
34
|
+
- name: Tests (pytest)
|
|
35
|
+
run: uv run pytest -q
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
# Publishes to PyPI when a GitHub Release is published, using Trusted Publishing
|
|
4
|
+
# (OIDC) — no API token is stored anywhere. The PyPI project (distribution name
|
|
5
|
+
# "apexgraph") must have a matching trusted publisher configured (repo:
|
|
6
|
+
# alfonsomayoral/graphex, workflow: publish.yml, environment: pypi). See RELEASING.md.
|
|
7
|
+
|
|
8
|
+
on:
|
|
9
|
+
release:
|
|
10
|
+
types: [published]
|
|
11
|
+
|
|
12
|
+
permissions:
|
|
13
|
+
contents: read
|
|
14
|
+
|
|
15
|
+
jobs:
|
|
16
|
+
build:
|
|
17
|
+
runs-on: ubuntu-latest
|
|
18
|
+
steps:
|
|
19
|
+
- uses: actions/checkout@v4
|
|
20
|
+
- name: Install uv
|
|
21
|
+
uses: astral-sh/setup-uv@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: "3.12"
|
|
24
|
+
- name: Build sdist and wheel
|
|
25
|
+
run: uv build
|
|
26
|
+
- name: Validate metadata
|
|
27
|
+
run: uvx twine check dist/*
|
|
28
|
+
- name: Upload build artifacts
|
|
29
|
+
uses: actions/upload-artifact@v4
|
|
30
|
+
with:
|
|
31
|
+
name: dist
|
|
32
|
+
path: dist/
|
|
33
|
+
|
|
34
|
+
publish:
|
|
35
|
+
needs: build
|
|
36
|
+
runs-on: ubuntu-latest
|
|
37
|
+
environment:
|
|
38
|
+
name: pypi
|
|
39
|
+
url: https://pypi.org/p/apexgraph
|
|
40
|
+
permissions:
|
|
41
|
+
id-token: write # required for Trusted Publishing (OIDC)
|
|
42
|
+
steps:
|
|
43
|
+
- name: Download build artifacts
|
|
44
|
+
uses: actions/download-artifact@v4
|
|
45
|
+
with:
|
|
46
|
+
name: dist
|
|
47
|
+
path: dist/
|
|
48
|
+
- name: Publish to PyPI
|
|
49
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
*.egg
|
|
9
|
+
|
|
10
|
+
# Virtual envs
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
env/
|
|
14
|
+
|
|
15
|
+
# uv
|
|
16
|
+
.uv/
|
|
17
|
+
|
|
18
|
+
# Tooling caches
|
|
19
|
+
.pytest_cache/
|
|
20
|
+
.ruff_cache/
|
|
21
|
+
.mypy_cache/
|
|
22
|
+
.coverage
|
|
23
|
+
htmlcov/
|
|
24
|
+
|
|
25
|
+
# Graphex sidecar cache + runtime artifacts
|
|
26
|
+
.graphex/
|
|
27
|
+
graphify-out/
|
|
28
|
+
|
|
29
|
+
# OS / editor
|
|
30
|
+
.DS_Store
|
|
31
|
+
Thumbs.db
|
|
32
|
+
.idea/
|
|
33
|
+
.vscode/
|
|
34
|
+
*.swp
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented here. The format is based on
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project
|
|
5
|
+
adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
|
+
|
|
7
|
+
## [0.1.0] - 2026-06-16
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
- Multi-format graph loader (graphify JSON, GraphML, Neo4j CSV) preserving
|
|
11
|
+
hyperedges, edge weight/confidence, communities and god nodes.
|
|
12
|
+
- BM25 lexical retriever with a cached inverted index and identifier-aware
|
|
13
|
+
tokenizer (camelCase / snake_case / PascalCase, compounds preserved).
|
|
14
|
+
- Personalized PageRank / random-walk-with-restart over weighted edges and
|
|
15
|
+
hyperedge cliques, plus query-independent global PageRank.
|
|
16
|
+
- Scorer fusing BM25-seeded PPR with an importance/god-node prior.
|
|
17
|
+
- Cost-aware MMR subgraph selection with a connectivity bonus and honest token
|
|
18
|
+
accounting (including injected source code); optional exact DP-knapsack mode.
|
|
19
|
+
- On-disk cache (`.graphex/`) for global PageRank and the BM25 index, invalidated
|
|
20
|
+
by content fingerprint.
|
|
21
|
+
- Static indexer for Python (`ast`), TypeScript/JavaScript (tree-sitter with a
|
|
22
|
+
regex fallback) and Go (regex), with incremental re-indexing by file hash.
|
|
23
|
+
- Markdown / JSON / YAML formatter and source-code injector.
|
|
24
|
+
- MCP stdio server exposing `graphex_query`, `graphex_explain`, `graphex_path`
|
|
25
|
+
and `graphex_stats`.
|
|
26
|
+
- Click CLI: `query`, `index`, `serve`, `stats`, `explain`, `path`, `diff`,
|
|
27
|
+
`export`, `benchmark`, `audit`, `init` — with autodiscovery, rich `--explain`,
|
|
28
|
+
and UTF-8 output on Windows.
|
|
29
|
+
- Context export for Claude / ChatGPT / CLAUDE.md, graph diffing, `.graphexignore`
|
|
30
|
+
filtering, a JSONL query audit log, and interactive HTML visualisation.
|
|
31
|
+
- Optional dense-embedding backend (OpenAI / Anthropic) behind the `[dense]` extra.
|
|
32
|
+
- Benchmark reporting recall@budget alongside token savings.
|
|
33
|
+
|
|
34
|
+
### Security
|
|
35
|
+
- Code injection (`--inject-code`) is contained to the project root: a crafted
|
|
36
|
+
`source_file` in an untrusted graph can no longer read arbitrary host files via
|
|
37
|
+
absolute paths or `..` traversal.
|
|
38
|
+
- The interactive visualisation loads vis-network from a pinned, immutable CDN
|
|
39
|
+
URL with a Subresource Integrity (SRI) hash, so a compromised CDN cannot inject
|
|
40
|
+
script into a generated page.
|
|
41
|
+
|
|
42
|
+
[0.1.0]: https://github.com/alfonsomayoral/graphex/releases/tag/v0.1.0
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Contributing to Graphex
|
|
2
|
+
|
|
3
|
+
Thanks for your interest in improving Graphex.
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
git clone https://github.com/alfonsomayoral/graphex
|
|
9
|
+
cd graphex
|
|
10
|
+
uv sync
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Before you open a PR
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
uv run ruff check . # lint (must pass)
|
|
17
|
+
uv run black --check . # format (must pass)
|
|
18
|
+
uv run pytest # tests (must pass)
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Architecture at a glance
|
|
22
|
+
|
|
23
|
+
Each module has a single responsibility; the data contract lives in
|
|
24
|
+
`graphex/models.py` (`KnowledgeGraph`, `Node`, `Edge`, `Hyperedge`). The scoring
|
|
25
|
+
pipeline is `retrieval/bm25.py` → `retrieval/ppr.py` → `retrieval/fusion.py` →
|
|
26
|
+
`scorer.py`; selection is `budget.py`; the user surface is `cli.py` and `mcp.py`.
|
|
27
|
+
|
|
28
|
+
When you add a module, add a matching `tests/test_<module>.py`. Keep public
|
|
29
|
+
functions typed and documented, and keep new dependencies out of the default
|
|
30
|
+
install path — put optional features behind an extra in `pyproject.toml`.
|
|
31
|
+
|
|
32
|
+
## Guidelines
|
|
33
|
+
|
|
34
|
+
- Prefer reusing the helpers already in `models.py` and `retrieval/base.py`.
|
|
35
|
+
- The budget invariant is sacred: `tokens_used` must never exceed the requested
|
|
36
|
+
budget. Any selection change needs a test that proves it.
|
|
37
|
+
- Token-saving claims must be paired with a recall metric — see `benchmark.py`.
|
apexgraph-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Alfonso Mayoral
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
apexgraph-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: apexgraph
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Apex-relevance subgraph retrieval for AI agents. Feed your LLM the peak of your knowledge graph, sized to a token budget.
|
|
5
|
+
Project-URL: Homepage, https://github.com/alfonsomayoral/graphex
|
|
6
|
+
Project-URL: Repository, https://github.com/alfonsomayoral/graphex
|
|
7
|
+
Project-URL: Issues, https://github.com/alfonsomayoral/graphex/issues
|
|
8
|
+
Author-email: Alfonso Mayoral <alfonsomayoral29@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ai-tools,bm25,cli,graphify,knowledge-graph,llm,mcp,pagerank,rag,token-budget
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Classifier: Topic :: Software Development :: Documentation
|
|
19
|
+
Requires-Python: >=3.12
|
|
20
|
+
Requires-Dist: click>=8.1
|
|
21
|
+
Requires-Dist: networkx>=3.2
|
|
22
|
+
Requires-Dist: pathspec>=0.12
|
|
23
|
+
Requires-Dist: rich>=13.7
|
|
24
|
+
Requires-Dist: tiktoken>=0.7
|
|
25
|
+
Provides-Extra: dense
|
|
26
|
+
Requires-Dist: anthropic>=0.34; extra == 'dense'
|
|
27
|
+
Requires-Dist: openai>=1.40; extra == 'dense'
|
|
28
|
+
Provides-Extra: ts
|
|
29
|
+
Requires-Dist: tree-sitter-typescript>=0.23; extra == 'ts'
|
|
30
|
+
Requires-Dist: tree-sitter>=0.22; extra == 'ts'
|
|
31
|
+
Provides-Extra: viz
|
|
32
|
+
Requires-Dist: watchdog>=4.0; extra == 'viz'
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
<div align="center">
|
|
36
|
+
|
|
37
|
+
# Graphex
|
|
38
|
+
|
|
39
|
+
**Apex-relevance subgraph retrieval for AI agents.**
|
|
40
|
+
|
|
41
|
+
Feed your LLM the *peak* of your knowledge graph — sized to a token budget.
|
|
42
|
+
|
|
43
|
+
[](https://github.com/alfonsomayoral/graphex/actions/workflows/ci.yml)
|
|
44
|
+

|
|
45
|
+

|
|
46
|
+
|
|
47
|
+
</div>
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
Knowledge graphs grow large. When an agent needs context about one corner of a
|
|
52
|
+
codebase, dumping the whole graph into the prompt wastes tokens and money — and
|
|
53
|
+
buries the relevant nodes in noise. **Graphex scores every node against your
|
|
54
|
+
query and returns the most relevant, connected subgraph that fits within a token
|
|
55
|
+
budget**, ready to paste into a prompt or serve over MCP.
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
graphex index . # build a graph from your code (no LLM)
|
|
59
|
+
graphex "how does auth work" --budget 4000 # retrieve the apex subgraph
|
|
60
|
+
graphex serve # expose it to agents over MCP
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Graphex reads the graphs produced by **graphify** and uses the rich signals
|
|
64
|
+
graphify emits — edge weights, confidence, hyperedges, communities, and god
|
|
65
|
+
nodes — that simpler tools throw away.
|
|
66
|
+
|
|
67
|
+
## Install
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
uv tool install apexgraph # or: pipx install apexgraph
|
|
71
|
+
# optional extras:
|
|
72
|
+
uv tool install "apexgraph[ts]" # better TypeScript indexing (tree-sitter)
|
|
73
|
+
uv tool install "apexgraph[dense]" # OpenAI/Anthropic embedding backend
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
The PyPI distribution is `apexgraph`; the command and import name are `graphex`.
|
|
77
|
+
Requires Python 3.12+.
|
|
78
|
+
|
|
79
|
+
## How it works
|
|
80
|
+
|
|
81
|
+
A five-stage pipeline, each stage a single-responsibility module:
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
load ─▶ score ─▶ select ─▶ inject ─▶ render
|
|
85
|
+
│ │ │ │ │
|
|
86
|
+
multi- BM25 → cost-aware source- markdown /
|
|
87
|
+
format PPR + MMR under code json / yaml
|
|
88
|
+
loader prior budget bodies
|
|
89
|
+
▲
|
|
90
|
+
index ───────────────┘ build a graph straight from code (no graphify)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
**Relevance is one principled number, not a hand-tuned mix.** BM25 finds the
|
|
94
|
+
nodes the query is literally about; those seed a **Personalized PageRank** walk
|
|
95
|
+
that spreads relevance across the weighted graph (edge `weight × confidence`,
|
|
96
|
+
plus hyperedge cliques); a light importance/god-node prior nudges genuinely
|
|
97
|
+
central entities up. The query-independent half — global PageRank, the BM25
|
|
98
|
+
inverted index — is precomputed once and cached, invalidated by content hash, so
|
|
99
|
+
a query is just a lookup plus one walk.
|
|
100
|
+
|
|
101
|
+
**Selection is a budgeted knapsack, solved as one.** Picking the highest-value
|
|
102
|
+
set of nodes under a token ceiling is the 0/1 knapsack problem. Graphex selects
|
|
103
|
+
by *marginal value per token* and shapes the result with two terms — an MMR
|
|
104
|
+
penalty so it doesn't say the same thing twice, and a connectivity bonus so the
|
|
105
|
+
result is a coherent connected subgraph, not a bag of redundant islands. An exact
|
|
106
|
+
DP-knapsack mode is available for benchmarking the value ceiling.
|
|
107
|
+
|
|
108
|
+
**Token accounting is honest.** A node's cost is the size of its *final rendered
|
|
109
|
+
form*, including any injected source code — so `tokens_used` never lies and the
|
|
110
|
+
output never overflows the budget you asked for.
|
|
111
|
+
|
|
112
|
+
## Usage
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
# Index a project into a graphify-compatible graph.json (Python / TS / Go)
|
|
116
|
+
graphex index ./src -o graph.json
|
|
117
|
+
graphex index ./src --incremental # re-index only changed files
|
|
118
|
+
|
|
119
|
+
# Query (any unrecognised first arg routes here)
|
|
120
|
+
graphex "session token validation" -b 2000
|
|
121
|
+
graphex "auth flow" --explain # per-node BM25 / PPR / prior breakdown
|
|
122
|
+
graphex "auth flow" --inject-code # include real function bodies, still in budget
|
|
123
|
+
graphex "auth flow" --viz # interactive force-directed HTML
|
|
124
|
+
|
|
125
|
+
# Inspect (node ids come from your indexed graph; these match examples/)
|
|
126
|
+
graphex stats -g examples/sample_graph.json
|
|
127
|
+
graphex explain auth_service_login -g examples/sample_graph.json
|
|
128
|
+
graphex path auth_service auth_service_login -g examples/sample_graph.json
|
|
129
|
+
|
|
130
|
+
# Export a context block to paste into a system prompt / CLAUDE.md
|
|
131
|
+
graphex export "auth flow" -f claudemd -o CONTEXT.md
|
|
132
|
+
|
|
133
|
+
# Measure quality honestly (recall@budget, not just tokens saved)
|
|
134
|
+
graphex benchmark -q "auth flow" -q "db pooling" -b 1000 -b 4000
|
|
135
|
+
|
|
136
|
+
# Compare two graph versions and see the change impact
|
|
137
|
+
graphex diff old.json new.json --budget 2000
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
See [`examples/`](examples/) for a full walkthrough on a sample project.
|
|
141
|
+
|
|
142
|
+
## MCP server
|
|
143
|
+
|
|
144
|
+
Graphex speaks the Model Context Protocol over stdio (stdlib only, no SDK):
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
graphex serve --graph graph.json
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
It exposes four tools: `graphex_query`, `graphex_explain`, `graphex_path`,
|
|
151
|
+
`graphex_stats`. Register it with Claude Code:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
claude mcp add graphex -- graphex serve --graph /abs/path/to/graph.json
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Honest benchmarking
|
|
158
|
+
|
|
159
|
+
"Tokens saved" is a vanity metric — a tool that returns nothing saves 100%.
|
|
160
|
+
Graphex reports **recall@budget** alongside it: how much of the relevant set the
|
|
161
|
+
budgeted subgraph actually captures. High savings with low recall means
|
|
162
|
+
under-retrieval, and the benchmark makes that trade-off visible.
|
|
163
|
+
|
|
164
|
+
## Development
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
uv sync
|
|
168
|
+
uv run pytest # test suite
|
|
169
|
+
uv run ruff check . # lint
|
|
170
|
+
uv run black . # format
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## License
|
|
174
|
+
|
|
175
|
+
MIT © Alfonso Mayoral
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# Graphex
|
|
4
|
+
|
|
5
|
+
**Apex-relevance subgraph retrieval for AI agents.**
|
|
6
|
+
|
|
7
|
+
Feed your LLM the *peak* of your knowledge graph — sized to a token budget.
|
|
8
|
+
|
|
9
|
+
[](https://github.com/alfonsomayoral/graphex/actions/workflows/ci.yml)
|
|
10
|
+

|
|
11
|
+

|
|
12
|
+
|
|
13
|
+
</div>
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
Knowledge graphs grow large. When an agent needs context about one corner of a
|
|
18
|
+
codebase, dumping the whole graph into the prompt wastes tokens and money — and
|
|
19
|
+
buries the relevant nodes in noise. **Graphex scores every node against your
|
|
20
|
+
query and returns the most relevant, connected subgraph that fits within a token
|
|
21
|
+
budget**, ready to paste into a prompt or serve over MCP.
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
graphex index . # build a graph from your code (no LLM)
|
|
25
|
+
graphex "how does auth work" --budget 4000 # retrieve the apex subgraph
|
|
26
|
+
graphex serve # expose it to agents over MCP
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Graphex reads the graphs produced by **graphify** and uses the rich signals
|
|
30
|
+
graphify emits — edge weights, confidence, hyperedges, communities, and god
|
|
31
|
+
nodes — that simpler tools throw away.
|
|
32
|
+
|
|
33
|
+
## Install
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
uv tool install apexgraph # or: pipx install apexgraph
|
|
37
|
+
# optional extras:
|
|
38
|
+
uv tool install "apexgraph[ts]" # better TypeScript indexing (tree-sitter)
|
|
39
|
+
uv tool install "apexgraph[dense]" # OpenAI/Anthropic embedding backend
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
The PyPI distribution is `apexgraph`; the command and import name are `graphex`.
|
|
43
|
+
Requires Python 3.12+.
|
|
44
|
+
|
|
45
|
+
## How it works
|
|
46
|
+
|
|
47
|
+
A five-stage pipeline, each stage a single-responsibility module:
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
load ─▶ score ─▶ select ─▶ inject ─▶ render
|
|
51
|
+
│ │ │ │ │
|
|
52
|
+
multi- BM25 → cost-aware source- markdown /
|
|
53
|
+
format PPR + MMR under code json / yaml
|
|
54
|
+
loader prior budget bodies
|
|
55
|
+
▲
|
|
56
|
+
index ───────────────┘ build a graph straight from code (no graphify)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
**Relevance is one principled number, not a hand-tuned mix.** BM25 finds the
|
|
60
|
+
nodes the query is literally about; those seed a **Personalized PageRank** walk
|
|
61
|
+
that spreads relevance across the weighted graph (edge `weight × confidence`,
|
|
62
|
+
plus hyperedge cliques); a light importance/god-node prior nudges genuinely
|
|
63
|
+
central entities up. The query-independent half — global PageRank, the BM25
|
|
64
|
+
inverted index — is precomputed once and cached, invalidated by content hash, so
|
|
65
|
+
a query is just a lookup plus one walk.
|
|
66
|
+
|
|
67
|
+
**Selection is a budgeted knapsack, solved as one.** Picking the highest-value
|
|
68
|
+
set of nodes under a token ceiling is the 0/1 knapsack problem. Graphex selects
|
|
69
|
+
by *marginal value per token* and shapes the result with two terms — an MMR
|
|
70
|
+
penalty so it doesn't say the same thing twice, and a connectivity bonus so the
|
|
71
|
+
result is a coherent connected subgraph, not a bag of redundant islands. An exact
|
|
72
|
+
DP-knapsack mode is available for benchmarking the value ceiling.
|
|
73
|
+
|
|
74
|
+
**Token accounting is honest.** A node's cost is the size of its *final rendered
|
|
75
|
+
form*, including any injected source code — so `tokens_used` never lies and the
|
|
76
|
+
output never overflows the budget you asked for.
|
|
77
|
+
|
|
78
|
+
## Usage
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# Index a project into a graphify-compatible graph.json (Python / TS / Go)
|
|
82
|
+
graphex index ./src -o graph.json
|
|
83
|
+
graphex index ./src --incremental # re-index only changed files
|
|
84
|
+
|
|
85
|
+
# Query (any unrecognised first arg routes here)
|
|
86
|
+
graphex "session token validation" -b 2000
|
|
87
|
+
graphex "auth flow" --explain # per-node BM25 / PPR / prior breakdown
|
|
88
|
+
graphex "auth flow" --inject-code # include real function bodies, still in budget
|
|
89
|
+
graphex "auth flow" --viz # interactive force-directed HTML
|
|
90
|
+
|
|
91
|
+
# Inspect (node ids come from your indexed graph; these match examples/)
|
|
92
|
+
graphex stats -g examples/sample_graph.json
|
|
93
|
+
graphex explain auth_service_login -g examples/sample_graph.json
|
|
94
|
+
graphex path auth_service auth_service_login -g examples/sample_graph.json
|
|
95
|
+
|
|
96
|
+
# Export a context block to paste into a system prompt / CLAUDE.md
|
|
97
|
+
graphex export "auth flow" -f claudemd -o CONTEXT.md
|
|
98
|
+
|
|
99
|
+
# Measure quality honestly (recall@budget, not just tokens saved)
|
|
100
|
+
graphex benchmark -q "auth flow" -q "db pooling" -b 1000 -b 4000
|
|
101
|
+
|
|
102
|
+
# Compare two graph versions and see the change impact
|
|
103
|
+
graphex diff old.json new.json --budget 2000
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
See [`examples/`](examples/) for a full walkthrough on a sample project.
|
|
107
|
+
|
|
108
|
+
## MCP server
|
|
109
|
+
|
|
110
|
+
Graphex speaks the Model Context Protocol over stdio (stdlib only, no SDK):
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
graphex serve --graph graph.json
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
It exposes four tools: `graphex_query`, `graphex_explain`, `graphex_path`,
|
|
117
|
+
`graphex_stats`. Register it with Claude Code:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
claude mcp add graphex -- graphex serve --graph /abs/path/to/graph.json
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Honest benchmarking
|
|
124
|
+
|
|
125
|
+
"Tokens saved" is a vanity metric — a tool that returns nothing saves 100%.
|
|
126
|
+
Graphex reports **recall@budget** alongside it: how much of the relevant set the
|
|
127
|
+
budgeted subgraph actually captures. High savings with low recall means
|
|
128
|
+
under-retrieval, and the benchmark makes that trade-off visible.
|
|
129
|
+
|
|
130
|
+
## Development
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
uv sync
|
|
134
|
+
uv run pytest # test suite
|
|
135
|
+
uv run ruff check . # lint
|
|
136
|
+
uv run black . # format
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## License
|
|
140
|
+
|
|
141
|
+
MIT © Alfonso Mayoral
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Releasing Graphex to PyPI
|
|
2
|
+
|
|
3
|
+
Graphex publishes via **Trusted Publishing** (OIDC) from GitHub Actions — no API
|
|
4
|
+
token is ever stored. The release workflow lives in
|
|
5
|
+
[`.github/workflows/publish.yml`](.github/workflows/publish.yml) and runs when a
|
|
6
|
+
GitHub Release is published. The PyPI distribution name is **`apexgraph`** (the
|
|
7
|
+
command and import name remain `graphex`).
|
|
8
|
+
|
|
9
|
+
## One-time setup (per maintainer)
|
|
10
|
+
|
|
11
|
+
1. **Create a PyPI account** at <https://pypi.org/account/register/> and enable
|
|
12
|
+
two-factor authentication (required).
|
|
13
|
+
2. **Add a pending trusted publisher** at
|
|
14
|
+
<https://pypi.org/manage/account/publishing/> with:
|
|
15
|
+
- PyPI Project Name: `apexgraph`
|
|
16
|
+
- Owner: `alfonsomayoral`
|
|
17
|
+
- Repository name: `graphex`
|
|
18
|
+
- Workflow name: `publish.yml`
|
|
19
|
+
- Environment name: `pypi`
|
|
20
|
+
|
|
21
|
+
This claims the project name and links it to this repo's workflow. (Optionally
|
|
22
|
+
do the same on <https://test.pypi.org> first for a dry run.)
|
|
23
|
+
|
|
24
|
+
## Cutting a release
|
|
25
|
+
|
|
26
|
+
1. Bump `version` in `pyproject.toml` and move the `CHANGELOG.md` entries from
|
|
27
|
+
`[Unreleased]` under the new version heading. Commit and push.
|
|
28
|
+
2. Tag and create the GitHub Release:
|
|
29
|
+
```bash
|
|
30
|
+
git tag v0.1.0
|
|
31
|
+
git push origin v0.1.0
|
|
32
|
+
gh release create v0.1.0 --title "v0.1.0" --notes-file <(sed -n '/## \[0.1.0\]/,/## \[/p' CHANGELOG.md)
|
|
33
|
+
```
|
|
34
|
+
(Or create the release from the GitHub UI.)
|
|
35
|
+
3. Publishing the release triggers `publish.yml`: it builds the sdist + wheel,
|
|
36
|
+
runs `twine check`, and uploads to PyPI via OIDC. Watch it with
|
|
37
|
+
`gh run watch`.
|
|
38
|
+
|
|
39
|
+
## Verify
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
uv tool install apexgraph
|
|
43
|
+
graphex --version
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
A version, once published, cannot be replaced — only yanked. Validate locally
|
|
47
|
+
first: `uv build && uvx twine check dist/*`.
|