apexgraph 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. apexgraph-0.1.0/.gitattributes +3 -0
  2. apexgraph-0.1.0/.github/workflows/ci.yml +35 -0
  3. apexgraph-0.1.0/.github/workflows/publish.yml +49 -0
  4. apexgraph-0.1.0/.gitignore +34 -0
  5. apexgraph-0.1.0/CHANGELOG.md +42 -0
  6. apexgraph-0.1.0/CONTRIBUTING.md +37 -0
  7. apexgraph-0.1.0/LICENSE +21 -0
  8. apexgraph-0.1.0/PKG-INFO +175 -0
  9. apexgraph-0.1.0/README.md +141 -0
  10. apexgraph-0.1.0/RELEASING.md +47 -0
  11. apexgraph-0.1.0/examples/README.md +164 -0
  12. apexgraph-0.1.0/examples/sample_graph.json +1395 -0
  13. apexgraph-0.1.0/examples/sample_project/__init__.py +1 -0
  14. apexgraph-0.1.0/examples/sample_project/api/__init__.py +1 -0
  15. apexgraph-0.1.0/examples/sample_project/api/routes.py +100 -0
  16. apexgraph-0.1.0/examples/sample_project/api/server.py +62 -0
  17. apexgraph-0.1.0/examples/sample_project/auth/__init__.py +1 -0
  18. apexgraph-0.1.0/examples/sample_project/auth/service.py +77 -0
  19. apexgraph-0.1.0/examples/sample_project/auth/session.py +68 -0
  20. apexgraph-0.1.0/examples/sample_project/db/__init__.py +1 -0
  21. apexgraph-0.1.0/examples/sample_project/db/models.py +82 -0
  22. apexgraph-0.1.0/examples/sample_project/db/pool.py +99 -0
  23. apexgraph-0.1.0/graphex/__init__.py +8 -0
  24. apexgraph-0.1.0/graphex/audit.py +111 -0
  25. apexgraph-0.1.0/graphex/benchmark.py +297 -0
  26. apexgraph-0.1.0/graphex/budget.py +336 -0
  27. apexgraph-0.1.0/graphex/cache.py +116 -0
  28. apexgraph-0.1.0/graphex/cli.py +675 -0
  29. apexgraph-0.1.0/graphex/diff.py +176 -0
  30. apexgraph-0.1.0/graphex/exporter.py +119 -0
  31. apexgraph-0.1.0/graphex/formatter.py +287 -0
  32. apexgraph-0.1.0/graphex/ignore.py +87 -0
  33. apexgraph-0.1.0/graphex/indexer/__init__.py +6 -0
  34. apexgraph-0.1.0/graphex/indexer/go.py +139 -0
  35. apexgraph-0.1.0/graphex/indexer/project.py +266 -0
  36. apexgraph-0.1.0/graphex/indexer/python.py +190 -0
  37. apexgraph-0.1.0/graphex/indexer/typescript.py +267 -0
  38. apexgraph-0.1.0/graphex/injector.py +207 -0
  39. apexgraph-0.1.0/graphex/loader.py +670 -0
  40. apexgraph-0.1.0/graphex/mcp.py +463 -0
  41. apexgraph-0.1.0/graphex/models.py +290 -0
  42. apexgraph-0.1.0/graphex/retrieval/__init__.py +6 -0
  43. apexgraph-0.1.0/graphex/retrieval/base.py +31 -0
  44. apexgraph-0.1.0/graphex/retrieval/bm25.py +223 -0
  45. apexgraph-0.1.0/graphex/retrieval/dense.py +81 -0
  46. apexgraph-0.1.0/graphex/retrieval/fusion.py +71 -0
  47. apexgraph-0.1.0/graphex/retrieval/ppr.py +225 -0
  48. apexgraph-0.1.0/graphex/scorer.py +113 -0
  49. apexgraph-0.1.0/graphex/viz.py +321 -0
  50. apexgraph-0.1.0/pyproject.toml +76 -0
  51. apexgraph-0.1.0/tests/__init__.py +0 -0
  52. apexgraph-0.1.0/tests/test_audit.py +113 -0
  53. apexgraph-0.1.0/tests/test_benchmark.py +163 -0
  54. apexgraph-0.1.0/tests/test_bm25.py +177 -0
  55. apexgraph-0.1.0/tests/test_budget.py +137 -0
  56. apexgraph-0.1.0/tests/test_cache.py +63 -0
  57. apexgraph-0.1.0/tests/test_cli.py +137 -0
  58. apexgraph-0.1.0/tests/test_dense.py +42 -0
  59. apexgraph-0.1.0/tests/test_diff.py +100 -0
  60. apexgraph-0.1.0/tests/test_exporter.py +85 -0
  61. apexgraph-0.1.0/tests/test_formatter.py +147 -0
  62. apexgraph-0.1.0/tests/test_fusion.py +45 -0
  63. apexgraph-0.1.0/tests/test_ignore.py +99 -0
  64. apexgraph-0.1.0/tests/test_indexer.py +325 -0
  65. apexgraph-0.1.0/tests/test_injector.py +124 -0
  66. apexgraph-0.1.0/tests/test_loader.py +349 -0
  67. apexgraph-0.1.0/tests/test_mcp.py +249 -0
  68. apexgraph-0.1.0/tests/test_ppr.py +187 -0
  69. apexgraph-0.1.0/tests/test_review_fixes.py +146 -0
  70. apexgraph-0.1.0/tests/test_scorer.py +122 -0
  71. apexgraph-0.1.0/tests/test_security.py +62 -0
  72. apexgraph-0.1.0/tests/test_viz.py +162 -0
  73. apexgraph-0.1.0/uv.lock +923 -0
@@ -0,0 +1,3 @@
1
+ # Normalize line endings: text files are LF in the repo, native in the working tree.
2
+ * text=auto
3
+ *.py text eol=lf
@@ -0,0 +1,35 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ python-version: ["3.12", "3.13"]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Install uv
21
+ uses: astral-sh/setup-uv@v5
22
+ with:
23
+ python-version: ${{ matrix.python-version }}
24
+
25
+ - name: Install dependencies
26
+ run: uv sync
27
+
28
+ - name: Lint (ruff)
29
+ run: uv run ruff check .
30
+
31
+ - name: Format check (black)
32
+ run: uv run black --check .
33
+
34
+ - name: Tests (pytest)
35
+ run: uv run pytest -q
@@ -0,0 +1,49 @@
1
+ name: Publish to PyPI
2
+
3
+ # Publishes to PyPI when a GitHub Release is published, using Trusted Publishing
4
+ # (OIDC) — no API token is stored anywhere. The PyPI project (distribution name
5
+ # "apexgraph") must have a matching trusted publisher configured (repo:
6
+ # alfonsomayoral/graphex, workflow: publish.yml, environment: pypi). See RELEASING.md.
7
+
8
+ on:
9
+ release:
10
+ types: [published]
11
+
12
+ permissions:
13
+ contents: read
14
+
15
+ jobs:
16
+ build:
17
+ runs-on: ubuntu-latest
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+ - name: Install uv
21
+ uses: astral-sh/setup-uv@v5
22
+ with:
23
+ python-version: "3.12"
24
+ - name: Build sdist and wheel
25
+ run: uv build
26
+ - name: Validate metadata
27
+ run: uvx twine check dist/*
28
+ - name: Upload build artifacts
29
+ uses: actions/upload-artifact@v4
30
+ with:
31
+ name: dist
32
+ path: dist/
33
+
34
+ publish:
35
+ needs: build
36
+ runs-on: ubuntu-latest
37
+ environment:
38
+ name: pypi
39
+ url: https://pypi.org/p/apexgraph
40
+ permissions:
41
+ id-token: write # required for Trusted Publishing (OIDC)
42
+ steps:
43
+ - name: Download build artifacts
44
+ uses: actions/download-artifact@v4
45
+ with:
46
+ name: dist
47
+ path: dist/
48
+ - name: Publish to PyPI
49
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,34 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ build/
7
+ dist/
8
+ *.egg
9
+
10
+ # Virtual envs
11
+ .venv/
12
+ venv/
13
+ env/
14
+
15
+ # uv
16
+ .uv/
17
+
18
+ # Tooling caches
19
+ .pytest_cache/
20
+ .ruff_cache/
21
+ .mypy_cache/
22
+ .coverage
23
+ htmlcov/
24
+
25
+ # Graphex sidecar cache + runtime artifacts
26
+ .graphex/
27
+ graphify-out/
28
+
29
+ # OS / editor
30
+ .DS_Store
31
+ Thumbs.db
32
+ .idea/
33
+ .vscode/
34
+ *.swp
@@ -0,0 +1,42 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. The format is based on
4
+ [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project
5
+ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [0.1.0] - 2026-06-16
8
+
9
+ ### Added
10
+ - Multi-format graph loader (graphify JSON, GraphML, Neo4j CSV) preserving
11
+ hyperedges, edge weight/confidence, communities and god nodes.
12
+ - BM25 lexical retriever with a cached inverted index and identifier-aware
13
+ tokenizer (camelCase / snake_case / PascalCase, compounds preserved).
14
+ - Personalized PageRank / random-walk-with-restart over weighted edges and
15
+ hyperedge cliques, plus query-independent global PageRank.
16
+ - Scorer fusing BM25-seeded PPR with an importance/god-node prior.
17
+ - Cost-aware MMR subgraph selection with a connectivity bonus and honest token
18
+ accounting (including injected source code); optional exact DP-knapsack mode.
19
+ - On-disk cache (`.graphex/`) for global PageRank and the BM25 index, invalidated
20
+ by content fingerprint.
21
+ - Static indexer for Python (`ast`), TypeScript/JavaScript (tree-sitter with a
22
+ regex fallback) and Go (regex), with incremental re-indexing by file hash.
23
+ - Markdown / JSON / YAML formatter and source-code injector.
24
+ - MCP stdio server exposing `graphex_query`, `graphex_explain`, `graphex_path`
25
+ and `graphex_stats`.
26
+ - Click CLI: `query`, `index`, `serve`, `stats`, `explain`, `path`, `diff`,
27
+ `export`, `benchmark`, `audit`, `init` — with autodiscovery, rich `--explain`,
28
+ and UTF-8 output on Windows.
29
+ - Context export for Claude / ChatGPT / CLAUDE.md, graph diffing, `.graphexignore`
30
+ filtering, a JSONL query audit log, and interactive HTML visualisation.
31
+ - Optional dense-embedding backend (OpenAI / Anthropic) behind the `[dense]` extra.
32
+ - Benchmark reporting recall@budget alongside token savings.
33
+
34
+ ### Security
35
+ - Code injection (`--inject-code`) is contained to the project root: a crafted
36
+ `source_file` in an untrusted graph can no longer read arbitrary host files via
37
+ absolute paths or `..` traversal.
38
+ - The interactive visualisation loads vis-network from a pinned, immutable CDN
39
+ URL with a Subresource Integrity (SRI) hash, so a compromised CDN cannot inject
40
+ script into a generated page.
41
+
42
+ [0.1.0]: https://github.com/alfonsomayoral/graphex/releases/tag/v0.1.0
@@ -0,0 +1,37 @@
1
+ # Contributing to Graphex
2
+
3
+ Thanks for your interest in improving Graphex.
4
+
5
+ ## Setup
6
+
7
+ ```bash
8
+ git clone https://github.com/alfonsomayoral/graphex
9
+ cd graphex
10
+ uv sync
11
+ ```
12
+
13
+ ## Before you open a PR
14
+
15
+ ```bash
16
+ uv run ruff check . # lint (must pass)
17
+ uv run black --check . # format (must pass)
18
+ uv run pytest # tests (must pass)
19
+ ```
20
+
21
+ ## Architecture at a glance
22
+
23
+ Each module has a single responsibility; the data contract lives in
24
+ `graphex/models.py` (`KnowledgeGraph`, `Node`, `Edge`, `Hyperedge`). The scoring
25
+ pipeline is `retrieval/bm25.py` → `retrieval/ppr.py` → `retrieval/fusion.py` →
26
+ `scorer.py`; selection is `budget.py`; the user surface is `cli.py` and `mcp.py`.
27
+
28
+ When you add a module, add a matching `tests/test_<module>.py`. Keep public
29
+ functions typed and documented, and keep new dependencies out of the default
30
+ install path — put optional features behind an extra in `pyproject.toml`.
31
+
32
+ ## Guidelines
33
+
34
+ - Prefer reusing the helpers already in `models.py` and `retrieval/base.py`.
35
+ - The budget invariant is sacred: `tokens_used` must never exceed the requested
36
+ budget. Any selection change needs a test that proves it.
37
+ - Token-saving claims must be paired with a recall metric — see `benchmark.py`.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Alfonso Mayoral
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,175 @@
1
+ Metadata-Version: 2.4
2
+ Name: apexgraph
3
+ Version: 0.1.0
4
+ Summary: Apex-relevance subgraph retrieval for AI agents. Feed your LLM the peak of your knowledge graph, sized to a token budget.
5
+ Project-URL: Homepage, https://github.com/alfonsomayoral/graphex
6
+ Project-URL: Repository, https://github.com/alfonsomayoral/graphex
7
+ Project-URL: Issues, https://github.com/alfonsomayoral/graphex/issues
8
+ Author-email: Alfonso Mayoral <alfonsomayoral29@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: ai-tools,bm25,cli,graphify,knowledge-graph,llm,mcp,pagerank,rag,token-budget
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Topic :: Software Development :: Documentation
19
+ Requires-Python: >=3.12
20
+ Requires-Dist: click>=8.1
21
+ Requires-Dist: networkx>=3.2
22
+ Requires-Dist: pathspec>=0.12
23
+ Requires-Dist: rich>=13.7
24
+ Requires-Dist: tiktoken>=0.7
25
+ Provides-Extra: dense
26
+ Requires-Dist: anthropic>=0.34; extra == 'dense'
27
+ Requires-Dist: openai>=1.40; extra == 'dense'
28
+ Provides-Extra: ts
29
+ Requires-Dist: tree-sitter-typescript>=0.23; extra == 'ts'
30
+ Requires-Dist: tree-sitter>=0.22; extra == 'ts'
31
+ Provides-Extra: viz
32
+ Requires-Dist: watchdog>=4.0; extra == 'viz'
33
+ Description-Content-Type: text/markdown
34
+
35
+ <div align="center">
36
+
37
+ # Graphex
38
+
39
+ **Apex-relevance subgraph retrieval for AI agents.**
40
+
41
+ Feed your LLM the *peak* of your knowledge graph — sized to a token budget.
42
+
43
+ [![CI](https://github.com/alfonsomayoral/graphex/actions/workflows/ci.yml/badge.svg)](https://github.com/alfonsomayoral/graphex/actions/workflows/ci.yml)
44
+ ![Python](https://img.shields.io/badge/python-3.12%2B-blue)
45
+ ![License](https://img.shields.io/badge/license-MIT-green)
46
+
47
+ </div>
48
+
49
+ ---
50
+
51
+ Knowledge graphs grow large. When an agent needs context about one corner of a
52
+ codebase, dumping the whole graph into the prompt wastes tokens and money — and
53
+ buries the relevant nodes in noise. **Graphex scores every node against your
54
+ query and returns the most relevant, connected subgraph that fits within a token
55
+ budget**, ready to paste into a prompt or serve over MCP.
56
+
57
+ ```bash
58
+ graphex index . # build a graph from your code (no LLM)
59
+ graphex "how does auth work" --budget 4000 # retrieve the apex subgraph
60
+ graphex serve # expose it to agents over MCP
61
+ ```
62
+
63
+ Graphex reads the graphs produced by **graphify** and uses the rich signals
64
+ graphify emits — edge weights, confidence, hyperedges, communities, and god
65
+ nodes — that simpler tools throw away.
66
+
67
+ ## Install
68
+
69
+ ```bash
70
+ uv tool install apexgraph # or: pipx install apexgraph
71
+ # optional extras:
72
+ uv tool install "apexgraph[ts]" # better TypeScript indexing (tree-sitter)
73
+ uv tool install "apexgraph[dense]" # OpenAI/Anthropic embedding backend
74
+ ```
75
+
76
+ The PyPI distribution is `apexgraph`; the command and import name are `graphex`.
77
+ Requires Python 3.12+.
78
+
79
+ ## How it works
80
+
81
+ A five-stage pipeline, each stage a single-responsibility module:
82
+
83
+ ```
84
+ load ─▶ score ─▶ select ─▶ inject ─▶ render
85
+ │ │ │ │ │
86
+ multi- BM25 → cost-aware source- markdown /
87
+ format PPR + MMR under code json / yaml
88
+ loader prior budget bodies
89
+
90
+ index ───────────────┘ build a graph straight from code (no graphify)
91
+ ```
92
+
93
+ **Relevance is one principled number, not a hand-tuned mix.** BM25 finds the
94
+ nodes the query is literally about; those seed a **Personalized PageRank** walk
95
+ that spreads relevance across the weighted graph (edge `weight × confidence`,
96
+ plus hyperedge cliques); a light importance/god-node prior nudges genuinely
97
+ central entities up. The query-independent half — global PageRank, the BM25
98
+ inverted index — is precomputed once and cached, invalidated by content hash, so
99
+ a query is just a lookup plus one walk.
100
+
101
+ **Selection is a budgeted knapsack, solved as one.** Picking the highest-value
102
+ set of nodes under a token ceiling is the 0/1 knapsack problem. Graphex selects
103
+ by *marginal value per token* and shapes the result with two terms — an MMR
104
+ penalty so it doesn't say the same thing twice, and a connectivity bonus so the
105
+ result is a coherent connected subgraph, not a bag of redundant islands. An exact
106
+ DP-knapsack mode is available for benchmarking the value ceiling.
107
+
108
+ **Token accounting is honest.** A node's cost is the size of its *final rendered
109
+ form*, including any injected source code — so `tokens_used` never lies and the
110
+ output never overflows the budget you asked for.
111
+
112
+ ## Usage
113
+
114
+ ```bash
115
+ # Index a project into a graphify-compatible graph.json (Python / TS / Go)
116
+ graphex index ./src -o graph.json
117
+ graphex index ./src --incremental # re-index only changed files
118
+
119
+ # Query (any unrecognised first arg routes here)
120
+ graphex "session token validation" -b 2000
121
+ graphex "auth flow" --explain # per-node BM25 / PPR / prior breakdown
122
+ graphex "auth flow" --inject-code # include real function bodies, still in budget
123
+ graphex "auth flow" --viz # interactive force-directed HTML
124
+
125
+ # Inspect (node ids come from your indexed graph; these match examples/)
126
+ graphex stats -g examples/sample_graph.json
127
+ graphex explain auth_service_login -g examples/sample_graph.json
128
+ graphex path auth_service auth_service_login -g examples/sample_graph.json
129
+
130
+ # Export a context block to paste into a system prompt / CLAUDE.md
131
+ graphex export "auth flow" -f claudemd -o CONTEXT.md
132
+
133
+ # Measure quality honestly (recall@budget, not just tokens saved)
134
+ graphex benchmark -q "auth flow" -q "db pooling" -b 1000 -b 4000
135
+
136
+ # Compare two graph versions and see the change impact
137
+ graphex diff old.json new.json --budget 2000
138
+ ```
139
+
140
+ See [`examples/`](examples/) for a full walkthrough on a sample project.
141
+
142
+ ## MCP server
143
+
144
+ Graphex speaks the Model Context Protocol over stdio (stdlib only, no SDK):
145
+
146
+ ```bash
147
+ graphex serve --graph graph.json
148
+ ```
149
+
150
+ It exposes four tools: `graphex_query`, `graphex_explain`, `graphex_path`,
151
+ `graphex_stats`. Register it with Claude Code:
152
+
153
+ ```bash
154
+ claude mcp add graphex -- graphex serve --graph /abs/path/to/graph.json
155
+ ```
156
+
157
+ ## Honest benchmarking
158
+
159
+ "Tokens saved" is a vanity metric — a tool that returns nothing saves 100%.
160
+ Graphex reports **recall@budget** alongside it: how much of the relevant set the
161
+ budgeted subgraph actually captures. High savings with low recall means
162
+ under-retrieval, and the benchmark makes that trade-off visible.
163
+
164
+ ## Development
165
+
166
+ ```bash
167
+ uv sync
168
+ uv run pytest # test suite
169
+ uv run ruff check . # lint
170
+ uv run black . # format
171
+ ```
172
+
173
+ ## License
174
+
175
+ MIT © Alfonso Mayoral
@@ -0,0 +1,141 @@
1
+ <div align="center">
2
+
3
+ # Graphex
4
+
5
+ **Apex-relevance subgraph retrieval for AI agents.**
6
+
7
+ Feed your LLM the *peak* of your knowledge graph — sized to a token budget.
8
+
9
+ [![CI](https://github.com/alfonsomayoral/graphex/actions/workflows/ci.yml/badge.svg)](https://github.com/alfonsomayoral/graphex/actions/workflows/ci.yml)
10
+ ![Python](https://img.shields.io/badge/python-3.12%2B-blue)
11
+ ![License](https://img.shields.io/badge/license-MIT-green)
12
+
13
+ </div>
14
+
15
+ ---
16
+
17
+ Knowledge graphs grow large. When an agent needs context about one corner of a
18
+ codebase, dumping the whole graph into the prompt wastes tokens and money — and
19
+ buries the relevant nodes in noise. **Graphex scores every node against your
20
+ query and returns the most relevant, connected subgraph that fits within a token
21
+ budget**, ready to paste into a prompt or serve over MCP.
22
+
23
+ ```bash
24
+ graphex index . # build a graph from your code (no LLM)
25
+ graphex "how does auth work" --budget 4000 # retrieve the apex subgraph
26
+ graphex serve # expose it to agents over MCP
27
+ ```
28
+
29
+ Graphex reads the graphs produced by **graphify** and uses the rich signals
30
+ graphify emits — edge weights, confidence, hyperedges, communities, and god
31
+ nodes — that simpler tools throw away.
32
+
33
+ ## Install
34
+
35
+ ```bash
36
+ uv tool install apexgraph # or: pipx install apexgraph
37
+ # optional extras:
38
+ uv tool install "apexgraph[ts]" # better TypeScript indexing (tree-sitter)
39
+ uv tool install "apexgraph[dense]" # OpenAI/Anthropic embedding backend
40
+ ```
41
+
42
+ The PyPI distribution is `apexgraph`; the command and import name are `graphex`.
43
+ Requires Python 3.12+.
44
+
45
+ ## How it works
46
+
47
+ A five-stage pipeline, each stage a single-responsibility module:
48
+
49
+ ```
50
+ load ─▶ score ─▶ select ─▶ inject ─▶ render
51
+ │ │ │ │ │
52
+ multi- BM25 → cost-aware source- markdown /
53
+ format PPR + MMR under code json / yaml
54
+ loader prior budget bodies
55
+
56
+ index ───────────────┘ build a graph straight from code (no graphify)
57
+ ```
58
+
59
+ **Relevance is one principled number, not a hand-tuned mix.** BM25 finds the
60
+ nodes the query is literally about; those seed a **Personalized PageRank** walk
61
+ that spreads relevance across the weighted graph (edge `weight × confidence`,
62
+ plus hyperedge cliques); a light importance/god-node prior nudges genuinely
63
+ central entities up. The query-independent half — global PageRank, the BM25
64
+ inverted index — is precomputed once and cached, invalidated by content hash, so
65
+ a query is just a lookup plus one walk.
66
+
67
+ **Selection is a budgeted knapsack, solved as one.** Picking the highest-value
68
+ set of nodes under a token ceiling is the 0/1 knapsack problem. Graphex selects
69
+ by *marginal value per token* and shapes the result with two terms — an MMR
70
+ penalty so it doesn't say the same thing twice, and a connectivity bonus so the
71
+ result is a coherent connected subgraph, not a bag of redundant islands. An exact
72
+ DP-knapsack mode is available for benchmarking the value ceiling.
73
+
74
+ **Token accounting is honest.** A node's cost is the size of its *final rendered
75
+ form*, including any injected source code — so `tokens_used` never lies and the
76
+ output never overflows the budget you asked for.
77
+
78
+ ## Usage
79
+
80
+ ```bash
81
+ # Index a project into a graphify-compatible graph.json (Python / TS / Go)
82
+ graphex index ./src -o graph.json
83
+ graphex index ./src --incremental # re-index only changed files
84
+
85
+ # Query (any unrecognised first arg routes here)
86
+ graphex "session token validation" -b 2000
87
+ graphex "auth flow" --explain # per-node BM25 / PPR / prior breakdown
88
+ graphex "auth flow" --inject-code # include real function bodies, still in budget
89
+ graphex "auth flow" --viz # interactive force-directed HTML
90
+
91
+ # Inspect (node ids come from your indexed graph; these match examples/)
92
+ graphex stats -g examples/sample_graph.json
93
+ graphex explain auth_service_login -g examples/sample_graph.json
94
+ graphex path auth_service auth_service_login -g examples/sample_graph.json
95
+
96
+ # Export a context block to paste into a system prompt / CLAUDE.md
97
+ graphex export "auth flow" -f claudemd -o CONTEXT.md
98
+
99
+ # Measure quality honestly (recall@budget, not just tokens saved)
100
+ graphex benchmark -q "auth flow" -q "db pooling" -b 1000 -b 4000
101
+
102
+ # Compare two graph versions and see the change impact
103
+ graphex diff old.json new.json --budget 2000
104
+ ```
105
+
106
+ See [`examples/`](examples/) for a full walkthrough on a sample project.
107
+
108
+ ## MCP server
109
+
110
+ Graphex speaks the Model Context Protocol over stdio (stdlib only, no SDK):
111
+
112
+ ```bash
113
+ graphex serve --graph graph.json
114
+ ```
115
+
116
+ It exposes four tools: `graphex_query`, `graphex_explain`, `graphex_path`,
117
+ `graphex_stats`. Register it with Claude Code:
118
+
119
+ ```bash
120
+ claude mcp add graphex -- graphex serve --graph /abs/path/to/graph.json
121
+ ```
122
+
123
+ ## Honest benchmarking
124
+
125
+ "Tokens saved" is a vanity metric — a tool that returns nothing saves 100%.
126
+ Graphex reports **recall@budget** alongside it: how much of the relevant set the
127
+ budgeted subgraph actually captures. High savings with low recall means
128
+ under-retrieval, and the benchmark makes that trade-off visible.
129
+
130
+ ## Development
131
+
132
+ ```bash
133
+ uv sync
134
+ uv run pytest # test suite
135
+ uv run ruff check . # lint
136
+ uv run black . # format
137
+ ```
138
+
139
+ ## License
140
+
141
+ MIT © Alfonso Mayoral
@@ -0,0 +1,47 @@
1
+ # Releasing Graphex to PyPI
2
+
3
+ Graphex publishes via **Trusted Publishing** (OIDC) from GitHub Actions — no API
4
+ token is ever stored. The release workflow lives in
5
+ [`.github/workflows/publish.yml`](.github/workflows/publish.yml) and runs when a
6
+ GitHub Release is published. The PyPI distribution name is **`apexgraph`** (the
7
+ command and import name remain `graphex`).
8
+
9
+ ## One-time setup (per maintainer)
10
+
11
+ 1. **Create a PyPI account** at <https://pypi.org/account/register/> and enable
12
+ two-factor authentication (required).
13
+ 2. **Add a pending trusted publisher** at
14
+ <https://pypi.org/manage/account/publishing/> with:
15
+ - PyPI Project Name: `apexgraph`
16
+ - Owner: `alfonsomayoral`
17
+ - Repository name: `graphex`
18
+ - Workflow name: `publish.yml`
19
+ - Environment name: `pypi`
20
+
21
+ This claims the project name and links it to this repo's workflow. (Optionally
22
+ do the same on <https://test.pypi.org> first for a dry run.)
23
+
24
+ ## Cutting a release
25
+
26
+ 1. Bump `version` in `pyproject.toml` and move the `CHANGELOG.md` entries from
27
+ `[Unreleased]` under the new version heading. Commit and push.
28
+ 2. Tag and create the GitHub Release:
29
+ ```bash
30
+ git tag v0.1.0
31
+ git push origin v0.1.0
32
+ gh release create v0.1.0 --title "v0.1.0" --notes-file <(sed -n '/## \[0.1.0\]/,/## \[/p' CHANGELOG.md)
33
+ ```
34
+ (Or create the release from the GitHub UI.)
35
+ 3. Publishing the release triggers `publish.yml`: it builds the sdist + wheel,
36
+ runs `twine check`, and uploads to PyPI via OIDC. Watch it with
37
+ `gh run watch`.
38
+
39
+ ## Verify
40
+
41
+ ```bash
42
+ uv tool install apexgraph
43
+ graphex --version
44
+ ```
45
+
46
+ A version, once published, cannot be replaced — only yanked. Validate locally
47
+ first: `uv build && uvx twine check dist/*`.