archgraph 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- archgraph-0.2.0/.claude/settings.local.json +16 -0
- archgraph-0.2.0/.dockerignore +12 -0
- archgraph-0.2.0/.github/REPO_SETTINGS.md +13 -0
- archgraph-0.2.0/.gitignore +48 -0
- archgraph-0.2.0/CLAUDE.md +66 -0
- archgraph-0.2.0/Dockerfile +29 -0
- archgraph-0.2.0/LICENSE +21 -0
- archgraph-0.2.0/PKG-INFO +282 -0
- archgraph-0.2.0/README.md +231 -0
- archgraph-0.2.0/archgraph/__init__.py +3 -0
- archgraph-0.2.0/archgraph/cli.py +740 -0
- archgraph-0.2.0/archgraph/config.py +202 -0
- archgraph-0.2.0/archgraph/enrichment/__init__.py +8 -0
- archgraph-0.2.0/archgraph/enrichment/churn.py +66 -0
- archgraph-0.2.0/archgraph/enrichment/clustering.py +115 -0
- archgraph-0.2.0/archgraph/enrichment/cve.py +127 -0
- archgraph-0.2.0/archgraph/enrichment/process.py +196 -0
- archgraph-0.2.0/archgraph/export.py +95 -0
- archgraph-0.2.0/archgraph/extractors/__init__.py +17 -0
- archgraph-0.2.0/archgraph/extractors/annotations.py +69 -0
- archgraph-0.2.0/archgraph/extractors/base.py +25 -0
- archgraph-0.2.0/archgraph/extractors/clang.py +975 -0
- archgraph-0.2.0/archgraph/extractors/deep/__init__.py +202 -0
- archgraph-0.2.0/archgraph/extractors/deep/engine.py +939 -0
- archgraph-0.2.0/archgraph/extractors/deep/go.py +71 -0
- archgraph-0.2.0/archgraph/extractors/deep/java.py +65 -0
- archgraph-0.2.0/archgraph/extractors/deep/kotlin.py +61 -0
- archgraph-0.2.0/archgraph/extractors/deep/lang_spec.py +52 -0
- archgraph-0.2.0/archgraph/extractors/deep/rust.py +80 -0
- archgraph-0.2.0/archgraph/extractors/deep/swift.py +74 -0
- archgraph-0.2.0/archgraph/extractors/dependencies.py +230 -0
- archgraph-0.2.0/archgraph/extractors/git.py +216 -0
- archgraph-0.2.0/archgraph/extractors/security_labels.py +108 -0
- archgraph-0.2.0/archgraph/extractors/treesitter.py +921 -0
- archgraph-0.2.0/archgraph/graph/__init__.py +17 -0
- archgraph-0.2.0/archgraph/graph/builder.py +572 -0
- archgraph-0.2.0/archgraph/graph/neo4j_store.py +415 -0
- archgraph-0.2.0/archgraph/graph/schema.py +264 -0
- archgraph-0.2.0/archgraph/manifest.py +260 -0
- archgraph-0.2.0/archgraph/mcp/__init__.py +5 -0
- archgraph-0.2.0/archgraph/mcp/server.py +454 -0
- archgraph-0.2.0/archgraph/registry.py +137 -0
- archgraph-0.2.0/archgraph/report.py +205 -0
- archgraph-0.2.0/archgraph/search.py +259 -0
- archgraph-0.2.0/archgraph/server/__init__.py +5 -0
- archgraph-0.2.0/archgraph/server/web.py +410 -0
- archgraph-0.2.0/archgraph/skills.py +320 -0
- archgraph-0.2.0/archgraph/tool/__init__.py +1 -0
- archgraph-0.2.0/archgraph/tool/archgraph_tool.py +225 -0
- archgraph-0.2.0/archgraph/tool/impact.py +202 -0
- archgraph-0.2.0/assets/banner.svg +103 -0
- archgraph-0.2.0/assets/logo.svg +72 -0
- archgraph-0.2.0/docker-compose.yml +29 -0
- archgraph-0.2.0/docs/AGENT.md +302 -0
- archgraph-0.2.0/docs/ARCHITECTURE.md +258 -0
- archgraph-0.2.0/docs/CLI.md +343 -0
- archgraph-0.2.0/docs/DEEP_ANALYSIS.md +112 -0
- archgraph-0.2.0/docs/ROADMAP.md +81 -0
- archgraph-0.2.0/docs/SECURITY.md +162 -0
- archgraph-0.2.0/pyproject.toml +66 -0
- archgraph-0.2.0/tests/__init__.py +0 -0
- archgraph-0.2.0/tests/test_builder.py +419 -0
- archgraph-0.2.0/tests/test_clang.py +367 -0
- archgraph-0.2.0/tests/test_cve.py +167 -0
- archgraph-0.2.0/tests/test_deep.py +661 -0
- archgraph-0.2.0/tests/test_extractors.py +341 -0
- archgraph-0.2.0/tests/test_gitnexus_features.py +255 -0
- archgraph-0.2.0/tests/test_manifest.py +232 -0
- archgraph-0.2.0/tests/test_neo4j_mock.py +53 -0
- archgraph-0.2.0/tests/test_new_features.py +198 -0
- archgraph-0.2.0/tests/test_schema.py +184 -0
- archgraph-0.2.0/tests/test_tool.py +17 -0
- archgraph-0.2.0/tests/test_treesitter.py +253 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(source:*)",
|
|
5
|
+
"Bash(python -m pytest:*)",
|
|
6
|
+
"Bash(archgraph --help:*)",
|
|
7
|
+
"WebSearch",
|
|
8
|
+
"WebFetch(domain:libclang.readthedocs.io)",
|
|
9
|
+
"WebFetch(domain:eli.thegreenplace.net)",
|
|
10
|
+
"Bash(wc -l:*)",
|
|
11
|
+
"Bash(pytest:*)",
|
|
12
|
+
"Bash(git add:*)",
|
|
13
|
+
"Bash(git commit:*)"
|
|
14
|
+
]
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# GitHub Repository Settings
|
|
2
|
+
|
|
3
|
+
Apply these settings manually via GitHub web UI (Settings > General):
|
|
4
|
+
|
|
5
|
+
## About
|
|
6
|
+
|
|
7
|
+
**Description:**
|
|
8
|
+
Source code knowledge graph extraction tool. Parses 10 languages via tree-sitter, builds Neo4j graph with CFG, data flow, taint analysis, and CVE enrichment.
|
|
9
|
+
|
|
10
|
+
**Website:** *(leave empty or add docs link)*
|
|
11
|
+
|
|
12
|
+
**Topics:**
|
|
13
|
+
`reverse-engineering` `static-analysis` `neo4j` `tree-sitter` `code-analysis` `security` `taint-analysis` `control-flow-graph` `cypher` `python`
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
*.egg
|
|
7
|
+
dist/
|
|
8
|
+
build/
|
|
9
|
+
.eggs/
|
|
10
|
+
|
|
11
|
+
# Virtual environments
|
|
12
|
+
.venv/
|
|
13
|
+
venv/
|
|
14
|
+
ENV/
|
|
15
|
+
|
|
16
|
+
# IDE
|
|
17
|
+
.idea/
|
|
18
|
+
.vscode/
|
|
19
|
+
*.swp
|
|
20
|
+
*.swo
|
|
21
|
+
*~
|
|
22
|
+
|
|
23
|
+
# Testing
|
|
24
|
+
.pytest_cache/
|
|
25
|
+
.coverage
|
|
26
|
+
htmlcov/
|
|
27
|
+
|
|
28
|
+
# OS
|
|
29
|
+
.DS_Store
|
|
30
|
+
Thumbs.db
|
|
31
|
+
|
|
32
|
+
# Ruff
|
|
33
|
+
.ruff_cache/
|
|
34
|
+
|
|
35
|
+
# Neo4j data
|
|
36
|
+
neo4j_data/
|
|
37
|
+
|
|
38
|
+
# Tree-sitter compiled grammars
|
|
39
|
+
grammars/
|
|
40
|
+
|
|
41
|
+
# rlm internal docs (not part of archgraph)
|
|
42
|
+
rlm-perspectives.md
|
|
43
|
+
rlm_insights.md
|
|
44
|
+
tools.md
|
|
45
|
+
|
|
46
|
+
# Accidental pip output
|
|
47
|
+
=18.1.0
|
|
48
|
+
.bg-shell/
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# ArchGraph
|
|
2
|
+
|
|
3
|
+
Kaynak kod graph extraction & Cypher query tool for reverse engineering.
|
|
4
|
+
|
|
5
|
+
## Geliştirme Ortamı
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
python3 -m venv .venv
|
|
9
|
+
source .venv/bin/activate
|
|
10
|
+
pip install -e ".[dev]"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Testler
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
# Tüm testler (102 test — 98 passed, 4 skipped)
|
|
17
|
+
pytest tests/ -v
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Test dosyaları Neo4j gerektirmez. Tree-sitter ve git testleri `tmp_path` fixture ile gerçek parse/commit yapar.
|
|
21
|
+
|
|
22
|
+
## Kod Kuralları
|
|
23
|
+
|
|
24
|
+
- Python 3.11+, type hint zorunlu
|
|
25
|
+
- Ruff formatter, line-length 100
|
|
26
|
+
- Her extractor `BaseExtractor`'dan türer, `extract(repo_path, **kwargs) -> GraphData` döner
|
|
27
|
+
- Extractor'lara `workers` kwarg ile parallelism geçirilir (thread-local parser)
|
|
28
|
+
- Node ID formatı: `{tip}:{yol}:{isim}:{satır}` (ör. `func:src/main.c:parse_data:42`)
|
|
29
|
+
- Edge'ler `(source_id, target_id, type)` tuple ile unique
|
|
30
|
+
- `GraphData.deduplicate()` pipeline sonunda çağrılır
|
|
31
|
+
- `GraphData.merge()` tek thread'de çağrılmalı (futures.result() sonrası)
|
|
32
|
+
- Security label'lar `config.py`'deki frozenset'lerle eşleşir
|
|
33
|
+
- Neo4j importta `_Node` label'ı tüm node'lara eklenir (cross-label query için)
|
|
34
|
+
- Neo4j'de `_id` property unique constraint taşır
|
|
35
|
+
|
|
36
|
+
## Thread Safety
|
|
37
|
+
|
|
38
|
+
- `ts.Language` objeleri thread-safe → paylaşılabilir
|
|
39
|
+
- `ts.Parser` objeleri thread-safe DEĞİL → `threading.local()` ile thread-başına instance
|
|
40
|
+
- libclang `Index` thread-safe değil → her thread kendi `Index.create()`
|
|
41
|
+
- Pipeline merge işlemleri ana thread'de yapılır
|
|
42
|
+
|
|
43
|
+
## Kilit Dosyalar
|
|
44
|
+
|
|
45
|
+
| Dosya | Açıklama |
|
|
46
|
+
|-------|----------|
|
|
47
|
+
| `archgraph/config.py` | Tüm sabitler, güvenlik pattern'leri, `ExtractConfig` dataclass |
|
|
48
|
+
| `archgraph/graph/builder.py` | 9-adım pipeline orkestrasyon (parallel/sequential) |
|
|
49
|
+
| `archgraph/graph/schema.py` | `Node`/`Edge` dataclass, `NodeLabel`/`EdgeType` sabitleri |
|
|
50
|
+
| `archgraph/graph/neo4j_store.py` | Neo4j batch import, `_Node` label, `_id` unique |
|
|
51
|
+
| `archgraph/extractors/treesitter.py` | Ana extractor, 10 dil, thread-local parser |
|
|
52
|
+
| `archgraph/extractors/clang.py` | libclang deep analysis (CFG, data flow, taint) |
|
|
53
|
+
| `archgraph/extractors/deep/` | Tree-sitter deep analysis (Rust, Java, Go, Kotlin, Swift) |
|
|
54
|
+
| `archgraph/enrichment/cve.py` | CVE enrichment — OSV API batch query |
|
|
55
|
+
| `archgraph/tool/archgraph_tool.py` | rlm-agent tool (standalone, BaseTool bağımlılığı yok) |
|
|
56
|
+
|
|
57
|
+
## Dokümantasyon
|
|
58
|
+
|
|
59
|
+
Detaylı dökümantasyon `docs/` altında:
|
|
60
|
+
|
|
61
|
+
- `docs/ARCHITECTURE.md` — Proje yapısı, pipeline, graph schema, node/edge tipleri
|
|
62
|
+
- `docs/CLI.md` — Tüm komutlar ve option'lar
|
|
63
|
+
- `docs/DEEP_ANALYSIS.md` — CFG, data flow, taint, dil-bazlı pattern'ler
|
|
64
|
+
- `docs/SECURITY.md` — Security labeling, CVE enrichment, örnek sorgular
|
|
65
|
+
- `docs/AGENT.md` — rlm-agent entegrasyonu, API referansı
|
|
66
|
+
- `docs/ROADMAP.md` — Faz 1-4 durumu
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# --- Stage 1: Builder ---
|
|
2
|
+
FROM python:3.11-slim AS builder
|
|
3
|
+
|
|
4
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
5
|
+
gcc g++ git \
|
|
6
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
7
|
+
|
|
8
|
+
WORKDIR /build
|
|
9
|
+
COPY . .
|
|
10
|
+
|
|
11
|
+
ARG INSTALL_EXTRAS=""
|
|
12
|
+
RUN if [ -z "$INSTALL_EXTRAS" ]; then \
|
|
13
|
+
pip install --no-cache-dir .; \
|
|
14
|
+
else \
|
|
15
|
+
pip install --no-cache-dir ".[$INSTALL_EXTRAS]"; \
|
|
16
|
+
fi
|
|
17
|
+
|
|
18
|
+
# --- Stage 2: Runtime ---
|
|
19
|
+
FROM python:3.11-slim
|
|
20
|
+
|
|
21
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
22
|
+
git \
|
|
23
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
24
|
+
|
|
25
|
+
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
|
|
26
|
+
COPY --from=builder /usr/local/bin/archgraph /usr/local/bin/archgraph
|
|
27
|
+
|
|
28
|
+
WORKDIR /data
|
|
29
|
+
ENTRYPOINT ["archgraph"]
|
archgraph-0.2.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024-2026 ArchGraph Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
archgraph-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: archgraph
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Security-first code intelligence for AI agents — taint analysis, CVE detection, MCP integration
|
|
5
|
+
Author: ArchGraph Contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: ai-agents,code-analysis,knowledge-graph,mcp,neo4j,security,static-analysis,taint-analysis
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Topic :: Security
|
|
12
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Requires-Dist: click>=8.1.0
|
|
15
|
+
Requires-Dist: fastapi>=0.110.0
|
|
16
|
+
Requires-Dist: jinja2>=3.1.0
|
|
17
|
+
Requires-Dist: mcp>=1.0.0
|
|
18
|
+
Requires-Dist: neo4j>=5.0.0
|
|
19
|
+
Requires-Dist: networkx>=3.0
|
|
20
|
+
Requires-Dist: pyyaml>=6.0
|
|
21
|
+
Requires-Dist: rich>=13.0.0
|
|
22
|
+
Requires-Dist: scipy>=1.11.0
|
|
23
|
+
Requires-Dist: toml>=0.10.0
|
|
24
|
+
Requires-Dist: tree-sitter-c>=0.23.0
|
|
25
|
+
Requires-Dist: tree-sitter-cpp>=0.23.0
|
|
26
|
+
Requires-Dist: tree-sitter-go>=0.23.0
|
|
27
|
+
Requires-Dist: tree-sitter-java>=0.23.0
|
|
28
|
+
Requires-Dist: tree-sitter-javascript>=0.23.0
|
|
29
|
+
Requires-Dist: tree-sitter-rust>=0.23.0
|
|
30
|
+
Requires-Dist: tree-sitter-typescript>=0.23.0
|
|
31
|
+
Requires-Dist: tree-sitter>=0.24.0
|
|
32
|
+
Requires-Dist: uvicorn>=0.29.0
|
|
33
|
+
Provides-Extra: all
|
|
34
|
+
Requires-Dist: libclang>=18.1.0; extra == 'all'
|
|
35
|
+
Requires-Dist: tree-sitter-kotlin>=0.23.0; extra == 'all'
|
|
36
|
+
Requires-Dist: tree-sitter-objc>=3.0.0; extra == 'all'
|
|
37
|
+
Requires-Dist: tree-sitter-swift>=0.6.0; extra == 'all'
|
|
38
|
+
Provides-Extra: clang
|
|
39
|
+
Requires-Dist: libclang>=18.1.0; extra == 'clang'
|
|
40
|
+
Provides-Extra: dev
|
|
41
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
|
|
42
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
43
|
+
Requires-Dist: ruff>=0.3.0; extra == 'dev'
|
|
44
|
+
Provides-Extra: kotlin
|
|
45
|
+
Requires-Dist: tree-sitter-kotlin>=0.23.0; extra == 'kotlin'
|
|
46
|
+
Provides-Extra: objc
|
|
47
|
+
Requires-Dist: tree-sitter-objc>=3.0.0; extra == 'objc'
|
|
48
|
+
Provides-Extra: swift
|
|
49
|
+
Requires-Dist: tree-sitter-swift>=0.6.0; extra == 'swift'
|
|
50
|
+
Description-Content-Type: text/markdown
|
|
51
|
+
|
|
52
|
+
<p align="center">
|
|
53
|
+
<img src="assets/banner.svg" alt="ArchGraph" width="700"/>
|
|
54
|
+
</p>
|
|
55
|
+
|
|
56
|
+
<p align="center">
|
|
57
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License: MIT"/></a>
|
|
58
|
+
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.11%2B-blue.svg" alt="Python 3.11+"/></a>
|
|
59
|
+
<a href="https://modelcontextprotocol.io"><img src="https://img.shields.io/badge/MCP-Server-green.svg" alt="MCP Server"/></a>
|
|
60
|
+
<img src="https://img.shields.io/badge/tests-137%20passed-brightgreen.svg" alt="Tests"/>
|
|
61
|
+
</p>
|
|
62
|
+
|
|
63
|
+
<p align="center">
|
|
64
|
+
<b>Security-first code intelligence for AI agents.</b><br/>
|
|
65
|
+
Parses <b>10 languages</b>, builds a knowledge graph with <b>taint analysis</b>, <b>CVE detection</b>, and <b>clustering</b>.<br/>
|
|
66
|
+
Connect to any AI agent via <b>MCP</b> — Cursor, Claude Code, Windsurf, and more.
|
|
67
|
+
</p>
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Why ArchGraph?
|
|
72
|
+
|
|
73
|
+
Other tools help you *understand* code. **ArchGraph helps you *secure* it.**
|
|
74
|
+
|
|
75
|
+
| | **ArchGraph** | **Code Search** | **AST Parsers** | **SAST Tools** |
|
|
76
|
+
|--|---------------|-----------------|-----------------|----------------|
|
|
77
|
+
| **Taint Analysis** | ✅ Input → Sink | ❌ | ❌ | ✅ |
|
|
78
|
+
| **CVE Detection** | ✅ Auto via OSV | ❌ | ❌ | Partial |
|
|
79
|
+
| **CFG / Data Flow** | ✅ libclang + tree-sitter | ❌ | Partial | ✅ |
|
|
80
|
+
| **MCP for AI Agents** | ✅ 7 tools | ❌ | ❌ | ❌ |
|
|
81
|
+
| **Functional Clustering** | ✅ Community detection | ❌ | ❌ | ❌ |
|
|
82
|
+
| **Execution Tracing** | ✅ Entry → Sink flows | ❌ | ❌ | ❌ |
|
|
83
|
+
| **Export (JSON/GraphML)** | ✅ | ❌ | ❌ | Partial |
|
|
84
|
+
| **Local-first** | ✅ Neo4j | Varies | ✅ | Varies |
|
|
85
|
+
| **License** | MIT | Varies | Varies | Often proprietary |
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Quick Start
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
# Install
|
|
93
|
+
pip install archgraph
|
|
94
|
+
|
|
95
|
+
# Extract (auto-detects languages)
|
|
96
|
+
archgraph extract /path/to/repo -w 4
|
|
97
|
+
|
|
98
|
+
# Query the graph
|
|
99
|
+
archgraph query "MATCH (f:Function {is_input_source: true}) RETURN f.name, f.file"
|
|
100
|
+
|
|
101
|
+
# Start web dashboard
|
|
102
|
+
archgraph serve --port 8080
|
|
103
|
+
|
|
104
|
+
# Generate HTML security report
|
|
105
|
+
archgraph report /path/to/repo
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
**With Docker (Neo4j included):**
|
|
109
|
+
```bash
|
|
110
|
+
docker compose up -d neo4j # password: archgraph
|
|
111
|
+
archgraph extract /path/to/repo --neo4j-password archgraph
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## 🤖 AI Agent Integration (MCP)
|
|
117
|
+
|
|
118
|
+
ArchGraph exposes 7 tools and 4 resources to any MCP-compatible agent.
|
|
119
|
+
|
|
120
|
+
### Setup
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
# Index your repo
|
|
124
|
+
archgraph extract . --include-cve --include-clustering
|
|
125
|
+
|
|
126
|
+
# Start MCP server
|
|
127
|
+
archgraph mcp
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
**Connect your agent:**
|
|
131
|
+
|
|
132
|
+
| Agent | Command |
|
|
133
|
+
|-------|---------|
|
|
134
|
+
| Claude Code | `claude mcp add archgraph -- archgraph mcp` |
|
|
135
|
+
| Cursor | Add to `~/.cursor/mcp.json` |
|
|
136
|
+
| Windsurf | Add to MCP config |
|
|
137
|
+
| OpenCode | Add to `~/.config/opencode/config.json` |
|
|
138
|
+
|
|
139
|
+
### What Your Agent Gets
|
|
140
|
+
|
|
141
|
+
**Tools:** `query`, `impact`, `context`, `detect_changes`, `find_vulnerabilities`, `cypher`, `stats`
|
|
142
|
+
|
|
143
|
+
**Resources:** `archgraph://schema`, `archgraph://security`, `archgraph://clusters`, `archgraph://processes`
|
|
144
|
+
|
|
145
|
+
### Example Conversation
|
|
146
|
+
|
|
147
|
+
```
|
|
148
|
+
You: "Are there any buffer overflow risks in the network code?"
|
|
149
|
+
|
|
150
|
+
Agent:
|
|
151
|
+
1. Queries input sources in network files
|
|
152
|
+
2. Traces taint paths to dangerous sinks
|
|
153
|
+
3. Reports: "Found 2 paths:
|
|
154
|
+
- net_recv() → memcpy() in src/net/handler.c (depth: 3)
|
|
155
|
+
- read_packet() → strcpy() in src/net/parser.c (depth: 4)
|
|
156
|
+
Both reach dangerous sinks without validation."
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## 🔒 Security Features
|
|
162
|
+
|
|
163
|
+
**Automatic labeling** — Every function gets security labels:
|
|
164
|
+
- `is_input_source` — reads external data (recv, read, fetch, ...)
|
|
165
|
+
- `is_dangerous_sink` — dangerous operations (memcpy, exec, eval, ...)
|
|
166
|
+
- `is_allocator`, `is_crypto`, `is_parser` — additional categories
|
|
167
|
+
- `risk_score` — 0-100 risk score based on labels
|
|
168
|
+
|
|
169
|
+
**Taint path detection:**
|
|
170
|
+
```cypher
|
|
171
|
+
MATCH path = (src:Function {is_input_source: true})-[:CALLS*1..8]->(sink:Function {is_dangerous_sink: true})
|
|
172
|
+
RETURN src.name, sink.name, length(path) AS depth
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
**CVE enrichment:**
|
|
176
|
+
```bash
|
|
177
|
+
archgraph extract . --include-cve # Queries OSV API automatically
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## All Commands
|
|
183
|
+
|
|
184
|
+
| Command | Description |
|
|
185
|
+
|---------|-------------|
|
|
186
|
+
| `extract` | Extract code graph from repository |
|
|
187
|
+
| `query` | Run Cypher queries against the graph |
|
|
188
|
+
| `stats` | Show node/edge statistics |
|
|
189
|
+
| `schema` | Show graph schema |
|
|
190
|
+
| `diff` | Compare repo state vs stored graph |
|
|
191
|
+
| `impact` | Blast radius analysis for a function |
|
|
192
|
+
| `export` | Export to JSON, GraphML, or CSV |
|
|
193
|
+
| `report` | Generate HTML security report |
|
|
194
|
+
| `serve` | Start web dashboard |
|
|
195
|
+
| `mcp` | Start MCP server for AI agents |
|
|
196
|
+
| `skills` | Generate AI agent skill files |
|
|
197
|
+
| `repos` | List indexed repositories |
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
## Use Cases
|
|
202
|
+
|
|
203
|
+
### Security Audit
|
|
204
|
+
```bash
|
|
205
|
+
archgraph extract /target -l c,cpp --include-cve --include-clang
|
|
206
|
+
archgraph query "MATCH path = (src:Function {is_input_source: true})-[:CALLS*1..5]->(sink:Function {is_dangerous_sink: true}) RETURN src.name, sink.name"
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Code Review
|
|
210
|
+
```bash
|
|
211
|
+
archgraph diff /path/to/repo
|
|
212
|
+
archgraph impact "func:src/api.c:handle:42" --direction both
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Reverse Engineering
|
|
216
|
+
```bash
|
|
217
|
+
archgraph extract /binary/project -l c,cpp,rust --include-clang --include-deep
|
|
218
|
+
archgraph query "MATCH (f:Function) WHERE f.is_exported = true RETURN f.name, f.file"
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## Architecture
|
|
224
|
+
|
|
225
|
+
```
|
|
226
|
+
┌──────────────────────────────────────────────────┐
|
|
227
|
+
│ GraphBuilder Pipeline (11 steps) │
|
|
228
|
+
│ │
|
|
229
|
+
Local Path ─────┤ 1. Tree-sitter structural extraction │
|
|
230
|
+
or │ 2. Git history │
|
|
231
|
+
GitHub URL ─────┤ 3. Dependency extraction │──── Neo4j
|
|
232
|
+
(auto clone) │ 4. Annotation scanning │ Store
|
|
233
|
+
│ 5. Security labeling │ │
|
|
234
|
+
│ 6. Clang deep analysis (C/C++) │ ├── MCP Server
|
|
235
|
+
│ 7. Tree-sitter deep analysis (Rust/Java/Go/…) │ ├── Web Dashboard
|
|
236
|
+
│ 8. Churn enrichment │ └── Export/Report
|
|
237
|
+
│ 9. CVE enrichment (OSV API) │
|
|
238
|
+
│ 10. Clustering (community detection) │
|
|
239
|
+
│ 11. Process tracing (execution flows) │
|
|
240
|
+
└──────────────────────────────────────────────────┘
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
245
|
+
## Benchmarks
|
|
246
|
+
|
|
247
|
+
| Project | Language | Files | Nodes | Edges | Time |
|
|
248
|
+
|---------|----------|-------|-------|-------|------|
|
|
249
|
+
| [zlib](https://github.com/madler/zlib) (~50K LOC) | C | 79 | 2,389 | 3,968 | 6.6s |
|
|
250
|
+
| [fastify](https://github.com/fastify/fastify) (~30K LOC) | JavaScript | 487 | 2,810 | 18,472 | 10.5s |
|
|
251
|
+
| Linux `drivers/usb` (~500K LOC) | C | 892 | 62,812 | 122,746 | 12.7s |
|
|
252
|
+
|
|
253
|
+
*Benchmarks: Windows 11, Python 3.13, single-threaded. Parallel mode (`-w 4`) is 2-3x faster.*
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## Documentation
|
|
258
|
+
|
|
259
|
+
| Document | Description |
|
|
260
|
+
|----------|-------------|
|
|
261
|
+
| [Architecture & Schema](docs/ARCHITECTURE.md) | Graph schema, node/edge types, pipeline |
|
|
262
|
+
| [CLI Reference](docs/CLI.md) | All commands and options |
|
|
263
|
+
| [AI Agent Integration](docs/AGENT.md) | MCP setup, tools, examples |
|
|
264
|
+
| [Security Analysis](docs/SECURITY.md) | Security labeling, Cypher queries |
|
|
265
|
+
| [Deep Analysis](docs/DEEP_ANALYSIS.md) | CFG, data flow, taint tracking |
|
|
266
|
+
| [Roadmap](docs/ROADMAP.md) | Development phases |
|
|
267
|
+
|
|
268
|
+
---
|
|
269
|
+
|
|
270
|
+
## Testing
|
|
271
|
+
|
|
272
|
+
```bash
|
|
273
|
+
pytest tests/ -v # 137 passed, 22 skipped
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
No external services required. Tests use temporary directories with real tree-sitter parsing and git operations.
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
|
|
280
|
+
## License
|
|
281
|
+
|
|
282
|
+
[MIT](LICENSE)
|