nelgraph 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nelgraph/__init__.py ADDED
@@ -0,0 +1,185 @@
1
+ """
2
+ GraphRAG Knowledge Base — Internal Python Module
3
+
4
+ Cách dùng nhanh nhất:
5
+
6
+ import graphrag
7
+ graphrag.configure(codebase_path="/path/to/project", openrouter_api_key="sk-...")
8
+ graphrag.run_init()
9
+
10
+ ctx = graphrag.get_function_context("processOrder")
11
+ snap = graphrag.get_snapshot()
12
+ changes = graphrag.get_changes("abc123f")
13
+ graphrag.mark_tested("processOrder")
14
+ """
15
+
16
+ # --- Public API ---
17
+ from nelgraph.knowledge_base import (
18
+ get_function_context,
19
+ get_snapshot,
20
+ get_changes,
21
+ mark_tested,
22
+ search,
23
+ run_init,
24
+ run_sync,
25
+ )
26
+
27
+ __version__ = "1.0.0"
28
+
29
+ __all__ = [
30
+ "configure",
31
+ "get_function_context",
32
+ "get_snapshot",
33
+ "get_changes",
34
+ "mark_tested",
35
+ "search",
36
+ "run_init",
37
+ "run_sync",
38
+ ]
39
+
40
+
41
+ def configure(
42
+ codebase_path: str = None,
43
+ openrouter_api_key: str = None,
44
+ neo4j_uri: str = None,
45
+ neo4j_password: str = None,
46
+ neo4j_user: str = None,
47
+ llm_model: str = None,
48
+ embedding_model: str = None,
49
+ embedding_dimensions: int = None,
50
+ ):
51
+ """
52
+ Cấu hình graphrag bằng code thay vì .env file.
53
+ Gọi hàm này TRƯỚC khi dùng bất kỳ function nào khác.
54
+
55
+ Args:
56
+ codebase_path: Đường dẫn tuyệt đối đến codebase cần analyze.
57
+ openrouter_api_key: API key của OpenRouter.
58
+ neo4j_uri: URI kết nối Neo4j (default: bolt://127.0.0.1:7687).
59
+ neo4j_password: Password Neo4j.
60
+ neo4j_user: Username Neo4j (default: neo4j).
61
+ llm_model: Model ID trên OpenRouter cho LLM enrichment.
62
+ embedding_model: Model ID trên OpenRouter cho embeddings.
63
+ embedding_dimensions: Số chiều vector (default: 512).
64
+
65
+ Ví dụ:
66
+ graphrag.configure(
67
+ codebase_path="/home/user/opensourcepos",
68
+ openrouter_api_key="sk-or-...",
69
+ )
70
+ """
71
+ import os
72
+ import nelgraph.config as _cfg
73
+
74
+ if codebase_path:
75
+ import os as _os
76
+ codebase_path = _os.path.abspath(codebase_path).replace("\\", "/")
77
+ _cfg.CODEBASE_PATH = codebase_path
78
+ os.environ["CODEBASE_PATH"] = codebase_path
79
+
80
+ # Recalculate dependent paths
81
+ _cfg.GRAPHRAG_DATA_DIR = _os.path.join(codebase_path, ".graphrag_data").replace("\\", "/")
82
+ _cfg.NEO4J_DATA_DIR = _os.path.join(_cfg.GRAPHRAG_DATA_DIR, "neo4j", "data").replace("\\", "/")
83
+ _cfg.NEO4J_LOGS_DIR = _os.path.join(_cfg.GRAPHRAG_DATA_DIR, "neo4j", "logs").replace("\\", "/")
84
+ _cfg.CHROMA_PATH = _os.path.join(_cfg.GRAPHRAG_DATA_DIR, "chromadb").replace("\\", "/")
85
+ _cfg.SYNC_STATE_PATH = _os.path.join(_cfg.GRAPHRAG_DATA_DIR, "sync_state.json").replace("\\", "/")
86
+
87
+ if openrouter_api_key:
88
+ _cfg.OPENROUTER_API_KEY = openrouter_api_key
89
+ os.environ["OPENROUTER_API_KEY"] = openrouter_api_key
90
+ # Reset lazy clients so they pick up the new key
91
+ _reset_ai_clients()
92
+
93
+ if neo4j_uri:
94
+ _cfg.NEO4J_URI = neo4j_uri
95
+ os.environ["NEO4J_URI"] = neo4j_uri
96
+
97
+ if neo4j_password:
98
+ _cfg.NEO4J_PASSWORD = neo4j_password
99
+ os.environ["NEO4J_PASSWORD"] = neo4j_password
100
+ # Reset Neo4j singleton
101
+ import nelgraph.graph.neo4j_client as _nc
102
+ _nc._client = None
103
+
104
+ if neo4j_user:
105
+ _cfg.NEO4J_USER = neo4j_user
106
+
107
+ if llm_model:
108
+ _cfg.LLM_MODEL = llm_model
109
+
110
+ if embedding_model:
111
+ _cfg.EMBEDDING_MODEL = embedding_model
112
+
113
+ if embedding_dimensions:
114
+ _cfg.EMBEDDING_DIMENSIONS = embedding_dimensions
115
+
116
+
117
+ def _reset_ai_clients():
118
+ """Reset tất cả lazy OpenAI client singletons để pick up config mới."""
119
+ try:
120
+ import nelgraph.embeddings.embedder as _emb
121
+ _emb._openai_client = None
122
+ except Exception:
123
+ pass
124
+ try:
125
+ import nelgraph.extractors.testing_enricher as _te
126
+ _te._client_ai = None
127
+ except Exception:
128
+ pass
129
+ try:
130
+ import nelgraph.extractors.llm_extractor as _le
131
+ _le._client = None
132
+ except Exception:
133
+ pass
134
+ try:
135
+ import nelgraph.community.summarizer as _sm
136
+ _sm._client_ai = None
137
+ except Exception:
138
+ pass
139
+
140
+
141
+ def status() -> dict:
142
+ """
143
+ Trả về trạng thái hiện tại của graph.
144
+ Không cần Neo4j đang chạy — nếu không kết nối được thì báo offline.
145
+
146
+ Returns:
147
+ {
148
+ "neo4j": "connected" | "offline",
149
+ "codebase_path": "...",
150
+ "last_sync": "...",
151
+ "total_functions": 0,
152
+ "enriched_functions": 0,
153
+ }
154
+ """
155
+ import nelgraph.config as _cfg
156
+ from nelgraph.initialize_graph import _load_sync_state
157
+
158
+ result = {
159
+ "neo4j": "offline",
160
+ "codebase_path": _cfg.CODEBASE_PATH,
161
+ "last_sync": None,
162
+ "total_functions": 0,
163
+ "enriched_functions": 0,
164
+ }
165
+
166
+ sync_state = _load_sync_state()
167
+ if sync_state:
168
+ result["last_sync"] = sync_state.get("last_sync_time")
169
+
170
+ try:
171
+ from nelgraph.graph.neo4j_client import get_client
172
+ client = get_client()
173
+ stats = client.run("""
174
+ OPTIONAL MATCH (f:Function) WITH count(f) as total
175
+ OPTIONAL MATCH (f2:Function) WHERE f2.how_it_works IS NOT NULL
176
+ RETURN total, count(f2) as enriched
177
+ """)
178
+ if stats:
179
+ result["neo4j"] = "connected"
180
+ result["total_functions"] = stats[0]["total"]
181
+ result["enriched_functions"] = stats[0]["enriched"]
182
+ except Exception:
183
+ pass
184
+
185
+ return result
nelgraph/cli.py ADDED
@@ -0,0 +1,144 @@
1
+ import click
2
+ from rich.console import Console
3
+ import os
4
+ import sys
5
+ import requests
6
+ import nelgraph
7
+
8
+ if sys.platform.startswith("win"):
9
+ try:
10
+ sys.stdout.reconfigure(encoding="utf-8")
11
+ sys.stderr.reconfigure(encoding="utf-8")
12
+ except Exception:
13
+ pass
14
+
15
+ console = Console()
16
+
17
+ def _check_for_updates():
18
+ try:
19
+ # Check PyPI version dynamically (timeout after 2s to prevent CLI blocking)
20
+ res = requests.get("https://pypi.org/pypi/nelgraph/json", timeout=2)
21
+ latest = res.json()["info"]["version"]
22
+ from nelgraph import __version__
23
+ if latest != __version__:
24
+ console.print(
25
+ f"[yellow]Update available: {__version__} → {latest}[/yellow]\n"
26
+ f"Run: [bold]pip install --upgrade nelgraph[/bold]"
27
+ )
28
+ except Exception:
29
+ pass
30
+
31
+ @click.group()
32
+ def main():
33
+ """nelgraph — Codebase Knowledge Graph & Semantic Search CLI"""
34
+ _check_for_updates()
35
+
36
+ def _install_git_hook(codebase_path: str):
37
+ git_dir = os.path.join(codebase_path, ".git")
38
+ if not os.path.exists(git_dir):
39
+ return
40
+ hooks_dir = os.path.join(git_dir, "hooks")
41
+ os.makedirs(hooks_dir, exist_ok=True)
42
+ hook_path = os.path.join(hooks_dir, "post-commit")
43
+
44
+ # Simple shell hook to run nelgraph sync in background silently
45
+ hook_content = """#!/bin/sh
46
+ # Auto-sync graph in background after commit
47
+ nelgraph sync --silent &
48
+ """
49
+ try:
50
+ with open(hook_path, "w", newline="\n", encoding="utf-8") as f:
51
+ f.write(hook_content)
52
+ # Make hook executable
53
+ import stat
54
+ st = os.stat(hook_path)
55
+ os.chmod(hook_path, st.st_mode | stat.S_IEXEC)
56
+ console.print("[green]✓ Git post-commit hook installed successfully.[/green]")
57
+ except Exception as e:
58
+ console.print(f"[yellow]⚠ Warning: Could not install Git post-commit hook: {e}[/yellow]")
59
+
60
+ @main.command()
61
+ @click.option("--key", help="OpenRouter API key")
62
+ @click.option("--path", default=".", help="Path to codebase (default: current dir)")
63
+ def init(key, path):
64
+ """
65
+ Khởi tạo GraphRAG cho project hiện tại.
66
+ Tạo .env, start Neo4j Docker, parse + enrich toàn bộ codebase.
67
+ """
68
+ abs_path = os.path.abspath(path).replace("\\", "/")
69
+
70
+ # Load env from target path if exists
71
+ from dotenv import load_dotenv
72
+ env_path = os.path.join(abs_path, ".env")
73
+ if os.path.exists(env_path):
74
+ load_dotenv(env_path)
75
+
76
+ api_key = key or os.getenv("OPENROUTER_API_KEY")
77
+ if not api_key:
78
+ api_key = click.prompt("OpenRouter API key", hide_input=True)
79
+
80
+ # Write or update .env in target directory
81
+ lines = []
82
+ if os.path.exists(env_path):
83
+ with open(env_path, "r", encoding="utf-8") as f:
84
+ lines = f.readlines()
85
+
86
+ has_key = False
87
+ has_path = False
88
+ new_lines = []
89
+ for line in lines:
90
+ if line.strip().startswith("OPENROUTER_API_KEY="):
91
+ new_lines.append(f"OPENROUTER_API_KEY={api_key}\n")
92
+ has_key = True
93
+ elif line.strip().startswith("CODEBASE_PATH="):
94
+ new_lines.append(f"CODEBASE_PATH={abs_path}\n")
95
+ has_path = True
96
+ else:
97
+ new_lines.append(line)
98
+
99
+ if not has_key:
100
+ new_lines.append(f"OPENROUTER_API_KEY={api_key}\n")
101
+ if not has_path:
102
+ new_lines.append(f"CODEBASE_PATH={abs_path}\n")
103
+
104
+ with open(env_path, "w", encoding="utf-8") as f:
105
+ f.writelines(new_lines)
106
+
107
+ console.print(f"[green]✓ Configured {env_path}[/green]")
108
+
109
+ # Programmatically configure nelgraph
110
+ nelgraph.configure(codebase_path=abs_path, openrouter_api_key=api_key)
111
+
112
+ # Run full initialization pipeline
113
+ nelgraph.run_init()
114
+
115
+ # Install git post-commit hook
116
+ _install_git_hook(abs_path)
117
+
118
+ @main.command()
119
+ @click.option("--silent", is_flag=True, help="Run silently without printing to stdout")
120
+ def sync(silent):
121
+ """Sync thủ công — parse files đã thay đổi kể từ lần sync cuối."""
122
+ if silent:
123
+ # Redirect stdout/stderr to devnull
124
+ sys.stdout = open(os.devnull, 'w')
125
+ sys.stderr = open(os.devnull, 'w')
126
+
127
+ # Run sync pipeline
128
+ nelgraph.run_sync()
129
+
130
+ @main.command()
131
+ def status():
132
+ """Xem trạng thái graph hiện tại."""
133
+ # Run status helper
134
+ from nelgraph.initialize_graph import run_status
135
+ run_status()
136
+
137
+ @main.command()
138
+ def watch():
139
+ """Chạy file watcher — tự sync khi có file thay đổi."""
140
+ from nelgraph.updater.watcher import start_watcher
141
+ start_watcher()
142
+
143
+ if __name__ == "__main__":
144
+ main()
@@ -0,0 +1 @@
1
+ # GraphRAG Community Detection and Summarization Package
@@ -0,0 +1,68 @@
1
+ import networkx as nx
2
+ import igraph as ig
3
+ import leidenalg
4
+ from nelgraph.graph.neo4j_client import get_client
5
+
6
+
7
+ def build_networkx_graph() -> nx.Graph:
8
+ """Convert Neo4j graph to NetworkX graph for algorithms."""
9
+ client = get_client()
10
+
11
+ G = nx.Graph()
12
+
13
+ # Get all nodes
14
+ nodes = client.run("MATCH (n) WHERE n.name IS NOT NULL RETURN elementId(n) as id, labels(n) as labels, n.name as name")
15
+ for record in nodes:
16
+ G.add_node(record["id"], name=record["name"], label=record["labels"][0] if record["labels"] else "Unknown")
17
+
18
+ # Get all edges
19
+ edges = client.run("MATCH (a)-[r]->(b) WHERE a.name IS NOT NULL AND b.name IS NOT NULL RETURN elementId(a) as from_id, elementId(b) as to_id, type(r) as rel_type")
20
+ for record in edges:
21
+ G.add_edge(record["from_id"], record["to_id"], rel_type=record["rel_type"])
22
+
23
+ print(f"[Community] NetworkX graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
24
+ return G
25
+
26
+
27
+ def detect_communities() -> dict:
28
+ """
29
+ Run Leiden algorithm via igraph/leidenalg.
30
+ Returns mapping node_id -> community_id.
31
+ Saves community_id to Neo4j nodes.
32
+ """
33
+ G = build_networkx_graph()
34
+ if G.number_of_nodes() == 0:
35
+ print("[Community] Empty graph, skipping community detection.")
36
+ return {}
37
+
38
+ # Convert NetworkX to igraph
39
+ nx_nodes = list(G.nodes())
40
+ node_id_map = {n: i for i, n in enumerate(nx_nodes)}
41
+
42
+ ig_graph = ig.Graph()
43
+ ig_graph.add_vertices(len(nx_nodes))
44
+
45
+ for u, v in G.edges():
46
+ ig_graph.add_edge(node_id_map[u], node_id_map[v])
47
+
48
+ # Run Leiden algorithm
49
+ partition = leidenalg.find_partition(ig_graph, leidenalg.ModularityVertexPartition)
50
+
51
+ # Build result mapping: neo4j_node_id -> community_id
52
+ result = {}
53
+ for community_id, members in enumerate(partition):
54
+ for member_idx in members:
55
+ neo4j_id = nx_nodes[member_idx]
56
+ result[neo4j_id] = community_id
57
+
58
+ print(f"[Community] Detected {len(partition)} communities.")
59
+
60
+ # Save community IDs to Neo4j
61
+ client = get_client()
62
+ for node_id, community_id in result.items():
63
+ client.run("""
64
+ MATCH (n) WHERE elementId(n) = $node_id
65
+ SET n.community_id = $community_id
66
+ """, {"node_id": node_id, "community_id": community_id})
67
+
68
+ return result
@@ -0,0 +1,208 @@
1
+ import openai
2
+ import re
3
+ from nelgraph.graph.neo4j_client import get_client
4
+
5
+ _client_ai = None
6
+
7
+ def _get_client_ai():
8
+ global _client_ai
9
+ if _client_ai is None:
10
+ import nelgraph.config as config
11
+ _client_ai = openai.OpenAI(
12
+ api_key=config.OPENROUTER_API_KEY,
13
+ base_url=config.OPENROUTER_BASE_URL,
14
+ )
15
+ return _client_ai
16
+
17
+
18
+ def get_community_members(community_id: int) -> list[dict]:
19
+ """Get all nodes belonging to a community."""
20
+ client = get_client()
21
+ result = client.run("""
22
+ MATCH (n) WHERE n.community_id = $cid AND n.name IS NOT NULL
23
+ RETURN labels(n) as labels, n.name as name, n.description as description
24
+ LIMIT 50
25
+ """, {"cid": community_id})
26
+
27
+ return [{"type": r["labels"][0], "name": r["name"], "description": r["description"]} for r in result]
28
+
29
+
30
+ def summarize_community(community_id: int) -> str:
31
+ """Use LLM to create a brief summary (~200 tokens) for a community."""
32
+ members = get_community_members(community_id)
33
+ if not members:
34
+ return ""
35
+
36
+ members_text = "\n".join([f"- [{m['type']}] {m['name']}: {m['description'] or ''}" for m in members[:30]])
37
+
38
+ prompt = f"""You are summarizing a cluster of related code elements for a developer knowledge graph.
39
+
40
+ Community members:
41
+ {members_text}
42
+
43
+ Write a 2-3 sentence summary of this community that answers:
44
+ 1. What is the main purpose/theme of this group?
45
+ 2. What are the key elements?
46
+ 3. Any notable risks, tasks, or decisions?
47
+
48
+ Keep it under 200 words. Be specific, not generic."""
49
+
50
+ import nelgraph.config as config
51
+ response = _get_client_ai().chat.completions.create(
52
+ model=config.LLM_MODEL,
53
+ max_tokens=300,
54
+ messages=[{"role": "user", "content": prompt}]
55
+ )
56
+
57
+ return response.choices[0].message.content.strip()
58
+
59
+
60
+ def infer_community_name(community_id: int, summary: str) -> str:
61
+ """Use LLM to give a short name to a community."""
62
+ import nelgraph.config as config
63
+ response = _get_client_ai().chat.completions.create(
64
+ model=config.LLM_MODEL,
65
+ max_tokens=20,
66
+ messages=[{"role": "user", "content": f"Give a 2-4 word name for this code community. Return ONLY the name:\n\n{summary}"}]
67
+ )
68
+ return response.choices[0].message.content.strip()
69
+
70
+
71
+ def summarize_all_communities():
72
+ """Summarize all communities and save to Neo4j and ChromaDB."""
73
+ client = get_client()
74
+
75
+ import chromadb
76
+ from nelgraph.config import CHROMA_PATH
77
+ from nelgraph.embeddings.embedder import embed_texts
78
+
79
+ chroma = chromadb.PersistentClient(path=CHROMA_PATH)
80
+ try:
81
+ chroma.delete_collection("community_summaries")
82
+ except Exception:
83
+ pass
84
+ comm_collection = chroma.get_or_create_collection("community_summaries")
85
+
86
+ # Get list of community IDs
87
+ result = client.run("MATCH (n) WHERE n.community_id IS NOT NULL RETURN DISTINCT n.community_id as cid ORDER BY cid")
88
+ community_ids = [r["cid"] for r in result]
89
+
90
+ print(f"[Community] Summarizing {len(community_ids)} communities...")
91
+
92
+ llm_count = 0
93
+ auto_count = 0
94
+
95
+ batch_ids, batch_docs, batch_metas = [], [], []
96
+
97
+ for cid in community_ids:
98
+ members = get_community_members(cid)
99
+ if not members:
100
+ continue
101
+
102
+ size = len(members)
103
+ if size < 3:
104
+ # Auto summarization for small communities
105
+ if size == 1:
106
+ name = f"Node: {members[0]['name']}"
107
+ summary = f"Isolated cluster containing element: {members[0]['name']} ({members[0]['type']})."
108
+ else:
109
+ name = f"Pair: {members[0]['name']} & {members[1]['name']}"
110
+ summary = f"Small cluster containing elements: {members[0]['name']} ({members[0]['type']}) and {members[1]['name']} ({members[1]['type']})."
111
+ auto_count += 1
112
+ else:
113
+ # LLM summarization for significant communities
114
+ members_text = "\n".join([f"- [{m['type']}] {m['name']}: {m['description'] or ''}" for m in members[:30]])
115
+ prompt = f"""You are summarizing a cluster of related code elements for a developer knowledge graph.
116
+
117
+ Community members:
118
+ {members_text}
119
+
120
+ Task:
121
+ 1. Write a 2-3 sentence summary of this community that describes its main purpose/theme, key elements, and any notable risks, tasks, or decisions.
122
+ 2. Provide a short, 2-4 word name for this community.
123
+
124
+ Return your response in the following format:
125
+ NAME: <your 2-4 word name>
126
+ SUMMARY: <your 2-3 sentence summary>"""
127
+
128
+ try:
129
+ import nelgraph.config as config
130
+ response = _get_client_ai().chat.completions.create(
131
+ model=config.LLM_MODEL,
132
+ max_tokens=350,
133
+ messages=[{"role": "user", "content": prompt}]
134
+ )
135
+ text = response.choices[0].message.content.strip()
136
+
137
+ # Parse the name and summary
138
+ name = f"Community {cid}"
139
+ summary = ""
140
+
141
+ # Extract NAME
142
+ name_match = re.search(r"NAME:\s*(.*)", text, re.IGNORECASE)
143
+ if name_match:
144
+ name = name_match.group(1).strip()
145
+ name = name.strip('"\'*` ')
146
+
147
+ # Extract SUMMARY
148
+ summary_match = re.search(r"SUMMARY:\s*([\s\S]*)", text, re.IGNORECASE)
149
+ if summary_match:
150
+ summary = summary_match.group(1).strip()
151
+ else:
152
+ # Fallback parser
153
+ lines = [line.strip() for line in text.split("\n") if line.strip()]
154
+ summary_lines = [l for l in lines if not l.upper().startswith("NAME:")]
155
+ summary = " ".join(summary_lines)
156
+
157
+ if not summary:
158
+ summary = text
159
+
160
+ except Exception as e:
161
+ name = f"Community {cid}"
162
+ summary = f"Cluster of related code elements including {members[0]['name']}."
163
+ llm_count += 1
164
+
165
+ # Create Community node in Neo4j
166
+ client.run("""
167
+ MERGE (c:Community {id: $cid})
168
+ SET c.name = $name, c.summary = $summary
169
+ """, {"cid": cid, "name": name, "summary": summary})
170
+
171
+ # Create BELONGS_TO edges
172
+ client.run("""
173
+ MATCH (c:Community {id: $cid})
174
+ MATCH (n) WHERE n.community_id = $cid AND NOT n:Community
175
+ MERGE (n)-[:BELONGS_TO]->(c)
176
+ """, {"cid": cid})
177
+
178
+ # Queue for ChromaDB embedding
179
+ batch_ids.append(str(cid))
180
+ batch_docs.append(summary)
181
+ batch_metas.append({"id": cid, "name": name})
182
+
183
+ if size < 3:
184
+ print(f" Community {cid} (Auto): '{name}'")
185
+ else:
186
+ print(f" Community {cid} (LLM): '{name}'")
187
+
188
+ # Embed and upsert in ChromaDB using batches
189
+ if batch_docs:
190
+ print(f"[Chroma] Embedding {len(batch_docs)} community summaries...")
191
+ try:
192
+ all_vectors = []
193
+ for i in range(0, len(batch_docs), 50):
194
+ slice_docs = batch_docs[i:i+50]
195
+ vectors = embed_texts(slice_docs)
196
+ all_vectors.extend(vectors)
197
+
198
+ comm_collection.upsert(
199
+ ids=batch_ids,
200
+ documents=batch_docs,
201
+ metadatas=batch_metas,
202
+ embeddings=all_vectors
203
+ )
204
+ print("[Chroma] All community summaries embedded.")
205
+ except Exception as e:
206
+ print(f"[Chroma] Error embedding community summaries: {e}")
207
+
208
+ print(f"[Community] Summarization done. LLM calls: {llm_count}, Auto: {auto_count}.")
nelgraph/config.py ADDED
@@ -0,0 +1,79 @@
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ # Load .env from the current working directory first, and fall back to the config directory
5
+ load_dotenv()
6
+ config_dir = os.path.dirname(os.path.abspath(__file__))
7
+ load_dotenv(os.path.join(config_dir, ".env"))
8
+
9
+ PROJECT_NAME = os.getenv("PROJECT_NAME", "GraphRAG-Project")
10
+
11
+
12
+
13
+ # Target project to index. By default, it is the current working directory.
14
+ CODEBASE_PATH = os.getenv("CODEBASE_PATH")
15
+ if not CODEBASE_PATH:
16
+ CODEBASE_PATH = "."
17
+
18
+ # If it's a relative path, resolve it relative to the current working directory
19
+ if not os.path.isabs(CODEBASE_PATH):
20
+ CODEBASE_PATH = os.path.abspath(CODEBASE_PATH)
21
+
22
+ CODEBASE_PATH = CODEBASE_PATH.replace("\\", "/")
23
+
24
+ # GraphRAG data directory — stored inside the target codebase's .graphrag_data/
25
+ GRAPHRAG_DATA_DIR = os.getenv("GRAPHRAG_DATA_DIR")
26
+ if not GRAPHRAG_DATA_DIR:
27
+ GRAPHRAG_DATA_DIR = os.path.join(CODEBASE_PATH, ".graphrag_data")
28
+ GRAPHRAG_DATA_DIR = GRAPHRAG_DATA_DIR.replace("\\", "/")
29
+
30
+ # Supported languages for AST parsing
31
+ SUPPORTED_LANGUAGES = {
32
+ ".py": "python",
33
+ ".js": "javascript",
34
+ ".ts": "typescript",
35
+ ".jsx": "javascript",
36
+ ".tsx": "typescript",
37
+ ".php": "php",
38
+ }
39
+
40
+ # Directories to skip during parsing
41
+ IGNORE_DIRS = {".git", "node_modules", "__pycache__", ".venv", "venv", "dist", "build", ".next", ".cursor", ".claude", ".codex", ".gemini", ".ai-log"}
42
+ tool_dir_name = os.path.basename(config_dir)
43
+ if tool_dir_name and os.path.abspath(CODEBASE_PATH) != os.path.abspath(config_dir):
44
+ IGNORE_DIRS.add(tool_dir_name)
45
+
46
+ # OpenRouter API (used for both LLM and embeddings)
47
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
48
+ OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
49
+
50
+ # Model IDs on OpenRouter
51
+ LLM_MODEL = os.getenv("LLM_MODEL", "deepseek/deepseek-v4-flash")
52
+ EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "openai/text-embedding-3-large")
53
+ EMBEDDING_DIMENSIONS = int(os.getenv("EMBEDDING_DIMENSIONS", "512"))
54
+ ENRICH_MIN_COMPLEXITY = int(os.getenv("ENRICH_MIN_COMPLEXITY", "2"))
55
+
56
+ # Neo4j
57
+ NEO4J_URI = os.getenv("NEO4J_URI", "bolt://127.0.0.1:7687")
58
+ import sys
59
+ if sys.platform == "win32" and "localhost" in NEO4J_URI:
60
+ NEO4J_URI = NEO4J_URI.replace("localhost", "127.0.0.1")
61
+ NEO4J_USER = os.getenv("NEO4J_USER", "neo4j")
62
+ NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "graphrag123")
63
+
64
+ # Neo4j data directories (for Docker volume mounts)
65
+ NEO4J_DATA_DIR = os.path.join(GRAPHRAG_DATA_DIR, "neo4j", "data").replace("\\", "/")
66
+ NEO4J_LOGS_DIR = os.path.join(GRAPHRAG_DATA_DIR, "neo4j", "logs").replace("\\", "/")
67
+
68
+ # ChromaDB
69
+ CHROMA_PATH = os.getenv("CHROMA_PATH")
70
+ if not CHROMA_PATH:
71
+ CHROMA_PATH = os.path.join(GRAPHRAG_DATA_DIR, "chromadb")
72
+ CHROMA_PATH = CHROMA_PATH.replace("\\", "/")
73
+
74
+ # Sync state file for incremental updates
75
+ SYNC_STATE_PATH = os.path.join(GRAPHRAG_DATA_DIR, "sync_state.json").replace("\\", "/")
76
+
77
+ # GitHub (optional)
78
+ GITHUB_TOKEN = os.getenv("GITHUB_TOKEN", "")
79
+ GITHUB_REPO = os.getenv("GITHUB_REPO", "") # format: "owner/repo"
@@ -0,0 +1 @@
1
+ # GraphRAG Core package