archgraph 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. archgraph-0.2.0/.claude/settings.local.json +16 -0
  2. archgraph-0.2.0/.dockerignore +12 -0
  3. archgraph-0.2.0/.github/REPO_SETTINGS.md +13 -0
  4. archgraph-0.2.0/.gitignore +48 -0
  5. archgraph-0.2.0/CLAUDE.md +66 -0
  6. archgraph-0.2.0/Dockerfile +29 -0
  7. archgraph-0.2.0/LICENSE +21 -0
  8. archgraph-0.2.0/PKG-INFO +282 -0
  9. archgraph-0.2.0/README.md +231 -0
  10. archgraph-0.2.0/archgraph/__init__.py +3 -0
  11. archgraph-0.2.0/archgraph/cli.py +740 -0
  12. archgraph-0.2.0/archgraph/config.py +202 -0
  13. archgraph-0.2.0/archgraph/enrichment/__init__.py +8 -0
  14. archgraph-0.2.0/archgraph/enrichment/churn.py +66 -0
  15. archgraph-0.2.0/archgraph/enrichment/clustering.py +115 -0
  16. archgraph-0.2.0/archgraph/enrichment/cve.py +127 -0
  17. archgraph-0.2.0/archgraph/enrichment/process.py +196 -0
  18. archgraph-0.2.0/archgraph/export.py +95 -0
  19. archgraph-0.2.0/archgraph/extractors/__init__.py +17 -0
  20. archgraph-0.2.0/archgraph/extractors/annotations.py +69 -0
  21. archgraph-0.2.0/archgraph/extractors/base.py +25 -0
  22. archgraph-0.2.0/archgraph/extractors/clang.py +975 -0
  23. archgraph-0.2.0/archgraph/extractors/deep/__init__.py +202 -0
  24. archgraph-0.2.0/archgraph/extractors/deep/engine.py +939 -0
  25. archgraph-0.2.0/archgraph/extractors/deep/go.py +71 -0
  26. archgraph-0.2.0/archgraph/extractors/deep/java.py +65 -0
  27. archgraph-0.2.0/archgraph/extractors/deep/kotlin.py +61 -0
  28. archgraph-0.2.0/archgraph/extractors/deep/lang_spec.py +52 -0
  29. archgraph-0.2.0/archgraph/extractors/deep/rust.py +80 -0
  30. archgraph-0.2.0/archgraph/extractors/deep/swift.py +74 -0
  31. archgraph-0.2.0/archgraph/extractors/dependencies.py +230 -0
  32. archgraph-0.2.0/archgraph/extractors/git.py +216 -0
  33. archgraph-0.2.0/archgraph/extractors/security_labels.py +108 -0
  34. archgraph-0.2.0/archgraph/extractors/treesitter.py +921 -0
  35. archgraph-0.2.0/archgraph/graph/__init__.py +17 -0
  36. archgraph-0.2.0/archgraph/graph/builder.py +572 -0
  37. archgraph-0.2.0/archgraph/graph/neo4j_store.py +415 -0
  38. archgraph-0.2.0/archgraph/graph/schema.py +264 -0
  39. archgraph-0.2.0/archgraph/manifest.py +260 -0
  40. archgraph-0.2.0/archgraph/mcp/__init__.py +5 -0
  41. archgraph-0.2.0/archgraph/mcp/server.py +454 -0
  42. archgraph-0.2.0/archgraph/registry.py +137 -0
  43. archgraph-0.2.0/archgraph/report.py +205 -0
  44. archgraph-0.2.0/archgraph/search.py +259 -0
  45. archgraph-0.2.0/archgraph/server/__init__.py +5 -0
  46. archgraph-0.2.0/archgraph/server/web.py +410 -0
  47. archgraph-0.2.0/archgraph/skills.py +320 -0
  48. archgraph-0.2.0/archgraph/tool/__init__.py +1 -0
  49. archgraph-0.2.0/archgraph/tool/archgraph_tool.py +225 -0
  50. archgraph-0.2.0/archgraph/tool/impact.py +202 -0
  51. archgraph-0.2.0/assets/banner.svg +103 -0
  52. archgraph-0.2.0/assets/logo.svg +72 -0
  53. archgraph-0.2.0/docker-compose.yml +29 -0
  54. archgraph-0.2.0/docs/AGENT.md +302 -0
  55. archgraph-0.2.0/docs/ARCHITECTURE.md +258 -0
  56. archgraph-0.2.0/docs/CLI.md +343 -0
  57. archgraph-0.2.0/docs/DEEP_ANALYSIS.md +112 -0
  58. archgraph-0.2.0/docs/ROADMAP.md +81 -0
  59. archgraph-0.2.0/docs/SECURITY.md +162 -0
  60. archgraph-0.2.0/pyproject.toml +66 -0
  61. archgraph-0.2.0/tests/__init__.py +0 -0
  62. archgraph-0.2.0/tests/test_builder.py +419 -0
  63. archgraph-0.2.0/tests/test_clang.py +367 -0
  64. archgraph-0.2.0/tests/test_cve.py +167 -0
  65. archgraph-0.2.0/tests/test_deep.py +661 -0
  66. archgraph-0.2.0/tests/test_extractors.py +341 -0
  67. archgraph-0.2.0/tests/test_gitnexus_features.py +255 -0
  68. archgraph-0.2.0/tests/test_manifest.py +232 -0
  69. archgraph-0.2.0/tests/test_neo4j_mock.py +53 -0
  70. archgraph-0.2.0/tests/test_new_features.py +198 -0
  71. archgraph-0.2.0/tests/test_schema.py +184 -0
  72. archgraph-0.2.0/tests/test_tool.py +17 -0
  73. archgraph-0.2.0/tests/test_treesitter.py +253 -0
@@ -0,0 +1,16 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(source:*)",
5
+ "Bash(python -m pytest:*)",
6
+ "Bash(archgraph --help:*)",
7
+ "WebSearch",
8
+ "WebFetch(domain:libclang.readthedocs.io)",
9
+ "WebFetch(domain:eli.thegreenplace.net)",
10
+ "Bash(wc -l:*)",
11
+ "Bash(pytest:*)",
12
+ "Bash(git add:*)",
13
+ "Bash(git commit:*)"
14
+ ]
15
+ }
16
+ }
@@ -0,0 +1,12 @@
1
+ .git
2
+ .venv
3
+ __pycache__
4
+ *.pyc
5
+ .pytest_cache
6
+ .ruff_cache
7
+ assets/
8
+ docs/
9
+ tests/
10
+ *.md
11
+ !README.md
12
+ LICENSE
@@ -0,0 +1,13 @@
1
+ # GitHub Repository Settings
2
+
3
+ Apply these settings manually via GitHub web UI (Settings > General):
4
+
5
+ ## About
6
+
7
+ **Description:**
8
+ Source code knowledge graph extraction tool. Parses 10 languages via tree-sitter, builds Neo4j graph with CFG, data flow, taint analysis, and CVE enrichment.
9
+
10
+ **Website:** *(leave empty or add docs link)*
11
+
12
+ **Topics:**
13
+ `reverse-engineering` `static-analysis` `neo4j` `tree-sitter` `code-analysis` `security` `taint-analysis` `control-flow-graph` `cypher` `python`
@@ -0,0 +1,48 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ *.egg
7
+ dist/
8
+ build/
9
+ .eggs/
10
+
11
+ # Virtual environments
12
+ .venv/
13
+ venv/
14
+ ENV/
15
+
16
+ # IDE
17
+ .idea/
18
+ .vscode/
19
+ *.swp
20
+ *.swo
21
+ *~
22
+
23
+ # Testing
24
+ .pytest_cache/
25
+ .coverage
26
+ htmlcov/
27
+
28
+ # OS
29
+ .DS_Store
30
+ Thumbs.db
31
+
32
+ # Ruff
33
+ .ruff_cache/
34
+
35
+ # Neo4j data
36
+ neo4j_data/
37
+
38
+ # Tree-sitter compiled grammars
39
+ grammars/
40
+
41
+ # rlm internal docs (not part of archgraph)
42
+ rlm-perspectives.md
43
+ rlm_insights.md
44
+ tools.md
45
+
46
+ # Accidental pip output
47
+ =18.1.0
48
+ .bg-shell/
@@ -0,0 +1,66 @@
1
+ # ArchGraph
2
+
3
+ Kaynak kod graph extraction & Cypher query tool for reverse engineering.
4
+
5
+ ## Geliştirme Ortamı
6
+
7
+ ```bash
8
+ python3 -m venv .venv
9
+ source .venv/bin/activate
10
+ pip install -e ".[dev]"
11
+ ```
12
+
13
+ ## Testler
14
+
15
+ ```bash
16
+ # Tüm testler (102 test — 98 passed, 4 skipped)
17
+ pytest tests/ -v
18
+ ```
19
+
20
+ Test dosyaları Neo4j gerektirmez. Tree-sitter ve git testleri `tmp_path` fixture ile gerçek parse/commit yapar.
21
+
22
+ ## Kod Kuralları
23
+
24
+ - Python 3.11+, type hint zorunlu
25
+ - Ruff formatter, line-length 100
26
+ - Her extractor `BaseExtractor`'dan türer, `extract(repo_path, **kwargs) -> GraphData` döner
27
+ - Extractor'lara `workers` kwarg ile parallelism geçirilir (thread-local parser)
28
+ - Node ID formatı: `{tip}:{yol}:{isim}:{satır}` (ör. `func:src/main.c:parse_data:42`)
29
+ - Edge'ler `(source_id, target_id, type)` tuple ile unique
30
+ - `GraphData.deduplicate()` pipeline sonunda çağrılır
31
+ - `GraphData.merge()` tek thread'de çağrılmalı (futures.result() sonrası)
32
+ - Security label'lar `config.py`'deki frozenset'lerle eşleşir
33
+ - Neo4j importta `_Node` label'ı tüm node'lara eklenir (cross-label query için)
34
+ - Neo4j'de `_id` property unique constraint taşır
35
+
36
+ ## Thread Safety
37
+
38
+ - `ts.Language` objeleri thread-safe → paylaşılabilir
39
+ - `ts.Parser` objeleri thread-safe DEĞİL → `threading.local()` ile thread-başına instance
40
+ - libclang `Index` thread-safe değil → her thread kendi `Index.create()`
41
+ - Pipeline merge işlemleri ana thread'de yapılır
42
+
43
+ ## Kilit Dosyalar
44
+
45
+ | Dosya | Açıklama |
46
+ |-------|----------|
47
+ | `archgraph/config.py` | Tüm sabitler, güvenlik pattern'leri, `ExtractConfig` dataclass |
48
+ | `archgraph/graph/builder.py` | 9-adım pipeline orkestrasyon (parallel/sequential) |
49
+ | `archgraph/graph/schema.py` | `Node`/`Edge` dataclass, `NodeLabel`/`EdgeType` sabitleri |
50
+ | `archgraph/graph/neo4j_store.py` | Neo4j batch import, `_Node` label, `_id` unique |
51
+ | `archgraph/extractors/treesitter.py` | Ana extractor, 10 dil, thread-local parser |
52
+ | `archgraph/extractors/clang.py` | libclang deep analysis (CFG, data flow, taint) |
53
+ | `archgraph/extractors/deep/` | Tree-sitter deep analysis (Rust, Java, Go, Kotlin, Swift) |
54
+ | `archgraph/enrichment/cve.py` | CVE enrichment — OSV API batch query |
55
+ | `archgraph/tool/archgraph_tool.py` | rlm-agent tool (standalone, BaseTool bağımlılığı yok) |
56
+
57
+ ## Dokümantasyon
58
+
59
+ Detaylı dökümantasyon `docs/` altında:
60
+
61
+ - `docs/ARCHITECTURE.md` — Proje yapısı, pipeline, graph schema, node/edge tipleri
62
+ - `docs/CLI.md` — Tüm komutlar ve option'lar
63
+ - `docs/DEEP_ANALYSIS.md` — CFG, data flow, taint, dil-bazlı pattern'ler
64
+ - `docs/SECURITY.md` — Security labeling, CVE enrichment, örnek sorgular
65
+ - `docs/AGENT.md` — rlm-agent entegrasyonu, API referansı
66
+ - `docs/ROADMAP.md` — Faz 1-4 durumu
@@ -0,0 +1,29 @@
1
+ # --- Stage 1: Builder ---
2
+ FROM python:3.11-slim AS builder
3
+
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ gcc g++ git \
6
+ && rm -rf /var/lib/apt/lists/*
7
+
8
+ WORKDIR /build
9
+ COPY . .
10
+
11
+ ARG INSTALL_EXTRAS=""
12
+ RUN if [ -z "$INSTALL_EXTRAS" ]; then \
13
+ pip install --no-cache-dir .; \
14
+ else \
15
+ pip install --no-cache-dir ".[$INSTALL_EXTRAS]"; \
16
+ fi
17
+
18
+ # --- Stage 2: Runtime ---
19
+ FROM python:3.11-slim
20
+
21
+ RUN apt-get update && apt-get install -y --no-install-recommends \
22
+ git \
23
+ && rm -rf /var/lib/apt/lists/*
24
+
25
+ COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
26
+ COPY --from=builder /usr/local/bin/archgraph /usr/local/bin/archgraph
27
+
28
+ WORKDIR /data
29
+ ENTRYPOINT ["archgraph"]
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024-2026 ArchGraph Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,282 @@
1
+ Metadata-Version: 2.4
2
+ Name: archgraph
3
+ Version: 0.2.0
4
+ Summary: Security-first code intelligence for AI agents — taint analysis, CVE detection, MCP integration
5
+ Author: ArchGraph Contributors
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Keywords: ai-agents,code-analysis,knowledge-graph,mcp,neo4j,security,static-analysis,taint-analysis
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Topic :: Security
12
+ Classifier: Topic :: Software Development :: Quality Assurance
13
+ Requires-Python: >=3.11
14
+ Requires-Dist: click>=8.1.0
15
+ Requires-Dist: fastapi>=0.110.0
16
+ Requires-Dist: jinja2>=3.1.0
17
+ Requires-Dist: mcp>=1.0.0
18
+ Requires-Dist: neo4j>=5.0.0
19
+ Requires-Dist: networkx>=3.0
20
+ Requires-Dist: pyyaml>=6.0
21
+ Requires-Dist: rich>=13.0.0
22
+ Requires-Dist: scipy>=1.11.0
23
+ Requires-Dist: toml>=0.10.0
24
+ Requires-Dist: tree-sitter-c>=0.23.0
25
+ Requires-Dist: tree-sitter-cpp>=0.23.0
26
+ Requires-Dist: tree-sitter-go>=0.23.0
27
+ Requires-Dist: tree-sitter-java>=0.23.0
28
+ Requires-Dist: tree-sitter-javascript>=0.23.0
29
+ Requires-Dist: tree-sitter-rust>=0.23.0
30
+ Requires-Dist: tree-sitter-typescript>=0.23.0
31
+ Requires-Dist: tree-sitter>=0.24.0
32
+ Requires-Dist: uvicorn>=0.29.0
33
+ Provides-Extra: all
34
+ Requires-Dist: libclang>=18.1.0; extra == 'all'
35
+ Requires-Dist: tree-sitter-kotlin>=0.23.0; extra == 'all'
36
+ Requires-Dist: tree-sitter-objc>=3.0.0; extra == 'all'
37
+ Requires-Dist: tree-sitter-swift>=0.6.0; extra == 'all'
38
+ Provides-Extra: clang
39
+ Requires-Dist: libclang>=18.1.0; extra == 'clang'
40
+ Provides-Extra: dev
41
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
42
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
43
+ Requires-Dist: ruff>=0.3.0; extra == 'dev'
44
+ Provides-Extra: kotlin
45
+ Requires-Dist: tree-sitter-kotlin>=0.23.0; extra == 'kotlin'
46
+ Provides-Extra: objc
47
+ Requires-Dist: tree-sitter-objc>=3.0.0; extra == 'objc'
48
+ Provides-Extra: swift
49
+ Requires-Dist: tree-sitter-swift>=0.6.0; extra == 'swift'
50
+ Description-Content-Type: text/markdown
51
+
52
+ <p align="center">
53
+ <img src="assets/banner.svg" alt="ArchGraph" width="700"/>
54
+ </p>
55
+
56
+ <p align="center">
57
+ <a href="LICENSE"><img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License: MIT"/></a>
58
+ <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.11%2B-blue.svg" alt="Python 3.11+"/></a>
59
+ <a href="https://modelcontextprotocol.io"><img src="https://img.shields.io/badge/MCP-Server-green.svg" alt="MCP Server"/></a>
60
+ <img src="https://img.shields.io/badge/tests-137%20passed-brightgreen.svg" alt="Tests"/>
61
+ </p>
62
+
63
+ <p align="center">
64
+ <b>Security-first code intelligence for AI agents.</b><br/>
65
+ Parses <b>10 languages</b>, builds a knowledge graph with <b>taint analysis</b>, <b>CVE detection</b>, and <b>clustering</b>.<br/>
66
+ Connect to any AI agent via <b>MCP</b> — Cursor, Claude Code, Windsurf, and more.
67
+ </p>
68
+
69
+ ---
70
+
71
+ ## Why ArchGraph?
72
+
73
+ Other tools help you *understand* code. **ArchGraph helps you *secure* it.**
74
+
75
+ | | **ArchGraph** | **Code Search** | **AST Parsers** | **SAST Tools** |
76
+ |--|---------------|-----------------|-----------------|----------------|
77
+ | **Taint Analysis** | ✅ Input → Sink | ❌ | ❌ | ✅ |
78
+ | **CVE Detection** | ✅ Auto via OSV | ❌ | ❌ | Partial |
79
+ | **CFG / Data Flow** | ✅ libclang + tree-sitter | ❌ | Partial | ✅ |
80
+ | **MCP for AI Agents** | ✅ 7 tools | ❌ | ❌ | ❌ |
81
+ | **Functional Clustering** | ✅ Community detection | ❌ | ❌ | ❌ |
82
+ | **Execution Tracing** | ✅ Entry → Sink flows | ❌ | ❌ | ❌ |
83
+ | **Export (JSON/GraphML)** | ✅ | ❌ | ❌ | Partial |
84
+ | **Local-first** | ✅ Neo4j | Varies | ✅ | Varies |
85
+ | **License** | MIT | Varies | Varies | Often proprietary |
86
+
87
+ ---
88
+
89
+ ## Quick Start
90
+
91
+ ```bash
92
+ # Install
93
+ pip install archgraph
94
+
95
+ # Extract (auto-detects languages)
96
+ archgraph extract /path/to/repo -w 4
97
+
98
+ # Query the graph
99
+ archgraph query "MATCH (f:Function {is_input_source: true}) RETURN f.name, f.file"
100
+
101
+ # Start web dashboard
102
+ archgraph serve --port 8080
103
+
104
+ # Generate HTML security report
105
+ archgraph report /path/to/repo
106
+ ```
107
+
108
+ **With Docker (Neo4j included):**
109
+ ```bash
110
+ docker compose up -d neo4j # password: archgraph
111
+ archgraph extract /path/to/repo --neo4j-password archgraph
112
+ ```
113
+
114
+ ---
115
+
116
+ ## 🤖 AI Agent Integration (MCP)
117
+
118
+ ArchGraph exposes 7 tools and 4 resources to any MCP-compatible agent.
119
+
120
+ ### Setup
121
+
122
+ ```bash
123
+ # Index your repo
124
+ archgraph extract . --include-cve --include-clustering
125
+
126
+ # Start MCP server
127
+ archgraph mcp
128
+ ```
129
+
130
+ **Connect your agent:**
131
+
132
+ | Agent | Command |
133
+ |-------|---------|
134
+ | Claude Code | `claude mcp add archgraph -- archgraph mcp` |
135
+ | Cursor | Add to `~/.cursor/mcp.json` |
136
+ | Windsurf | Add to MCP config |
137
+ | OpenCode | Add to `~/.config/opencode/config.json` |
138
+
139
+ ### What Your Agent Gets
140
+
141
+ **Tools:** `query`, `impact`, `context`, `detect_changes`, `find_vulnerabilities`, `cypher`, `stats`
142
+
143
+ **Resources:** `archgraph://schema`, `archgraph://security`, `archgraph://clusters`, `archgraph://processes`
144
+
145
+ ### Example Conversation
146
+
147
+ ```
148
+ You: "Are there any buffer overflow risks in the network code?"
149
+
150
+ Agent:
151
+ 1. Queries input sources in network files
152
+ 2. Traces taint paths to dangerous sinks
153
+ 3. Reports: "Found 2 paths:
154
+ - net_recv() → memcpy() in src/net/handler.c (depth: 3)
155
+ - read_packet() → strcpy() in src/net/parser.c (depth: 4)
156
+ Both reach dangerous sinks without validation."
157
+ ```
158
+
159
+ ---
160
+
161
+ ## 🔒 Security Features
162
+
163
+ **Automatic labeling** — Every function gets security labels:
164
+ - `is_input_source` — reads external data (recv, read, fetch, ...)
165
+ - `is_dangerous_sink` — dangerous operations (memcpy, exec, eval, ...)
166
+ - `is_allocator`, `is_crypto`, `is_parser` — additional categories
167
+ - `risk_score` — 0-100 risk score based on labels
168
+
169
+ **Taint path detection:**
170
+ ```cypher
171
+ MATCH path = (src:Function {is_input_source: true})-[:CALLS*1..8]->(sink:Function {is_dangerous_sink: true})
172
+ RETURN src.name, sink.name, length(path) AS depth
173
+ ```
174
+
175
+ **CVE enrichment:**
176
+ ```bash
177
+ archgraph extract . --include-cve # Queries OSV API automatically
178
+ ```
179
+
180
+ ---
181
+
182
+ ## All Commands
183
+
184
+ | Command | Description |
185
+ |---------|-------------|
186
+ | `extract` | Extract code graph from repository |
187
+ | `query` | Run Cypher queries against the graph |
188
+ | `stats` | Show node/edge statistics |
189
+ | `schema` | Show graph schema |
190
+ | `diff` | Compare repo state vs stored graph |
191
+ | `impact` | Blast radius analysis for a function |
192
+ | `export` | Export to JSON, GraphML, or CSV |
193
+ | `report` | Generate HTML security report |
194
+ | `serve` | Start web dashboard |
195
+ | `mcp` | Start MCP server for AI agents |
196
+ | `skills` | Generate AI agent skill files |
197
+ | `repos` | List indexed repositories |
198
+
199
+ ---
200
+
201
+ ## Use Cases
202
+
203
+ ### Security Audit
204
+ ```bash
205
+ archgraph extract /target -l c,cpp --include-cve --include-clang
206
+ archgraph query "MATCH path = (src:Function {is_input_source: true})-[:CALLS*1..5]->(sink:Function {is_dangerous_sink: true}) RETURN src.name, sink.name"
207
+ ```
208
+
209
+ ### Code Review
210
+ ```bash
211
+ archgraph diff /path/to/repo
212
+ archgraph impact "func:src/api.c:handle:42" --direction both
213
+ ```
214
+
215
+ ### Reverse Engineering
216
+ ```bash
217
+ archgraph extract /binary/project -l c,cpp,rust --include-clang --include-deep
218
+ archgraph query "MATCH (f:Function) WHERE f.is_exported = true RETURN f.name, f.file"
219
+ ```
220
+
221
+ ---
222
+
223
+ ## Architecture
224
+
225
+ ```
226
+ ┌──────────────────────────────────────────────────┐
227
+ │ GraphBuilder Pipeline (11 steps) │
228
+ │ │
229
+ Local Path ─────┤ 1. Tree-sitter structural extraction │
230
+ or │ 2. Git history │
231
+ GitHub URL ─────┤ 3. Dependency extraction │──── Neo4j
232
+ (auto clone) │ 4. Annotation scanning │ Store
233
+ │ 5. Security labeling │ │
234
+ │ 6. Clang deep analysis (C/C++) │ ├── MCP Server
235
+ │ 7. Tree-sitter deep analysis (Rust/Java/Go/…) │ ├── Web Dashboard
236
+ │ 8. Churn enrichment │ └── Export/Report
237
+ │ 9. CVE enrichment (OSV API) │
238
+ │ 10. Clustering (community detection) │
239
+ │ 11. Process tracing (execution flows) │
240
+ └──────────────────────────────────────────────────┘
241
+ ```
242
+
243
+ ---
244
+
245
+ ## Benchmarks
246
+
247
+ | Project | Language | Files | Nodes | Edges | Time |
248
+ |---------|----------|-------|-------|-------|------|
249
+ | [zlib](https://github.com/madler/zlib) (~50K LOC) | C | 79 | 2,389 | 3,968 | 6.6s |
250
+ | [fastify](https://github.com/fastify/fastify) (~30K LOC) | JavaScript | 487 | 2,810 | 18,472 | 10.5s |
251
+ | Linux `drivers/usb` (~500K LOC) | C | 892 | 62,812 | 122,746 | 12.7s |
252
+
253
+ *Benchmarks: Windows 11, Python 3.13, single-threaded. Parallel mode (`-w 4`) is 2-3x faster.*
254
+
255
+ ---
256
+
257
+ ## Documentation
258
+
259
+ | Document | Description |
260
+ |----------|-------------|
261
+ | [Architecture & Schema](docs/ARCHITECTURE.md) | Graph schema, node/edge types, pipeline |
262
+ | [CLI Reference](docs/CLI.md) | All commands and options |
263
+ | [AI Agent Integration](docs/AGENT.md) | MCP setup, tools, examples |
264
+ | [Security Analysis](docs/SECURITY.md) | Security labeling, Cypher queries |
265
+ | [Deep Analysis](docs/DEEP_ANALYSIS.md) | CFG, data flow, taint tracking |
266
+ | [Roadmap](docs/ROADMAP.md) | Development phases |
267
+
268
+ ---
269
+
270
+ ## Testing
271
+
272
+ ```bash
273
+ pytest tests/ -v # 137 passed, 22 skipped
274
+ ```
275
+
276
+ No external services required. Tests use temporary directories with real tree-sitter parsing and git operations.
277
+
278
+ ---
279
+
280
+ ## License
281
+
282
+ [MIT](LICENSE)