@phoenixaihub/graphrag 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +17 -0
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +12 -0
  3. package/.github/pull_request_template.md +5 -0
  4. package/.github/workflows/ci.yml +22 -0
  5. package/LICENSE +21 -0
  6. package/README.md +103 -0
  7. package/dist/cli.d.ts +3 -0
  8. package/dist/cli.d.ts.map +1 -0
  9. package/dist/cli.js +47 -0
  10. package/dist/cli.js.map +1 -0
  11. package/dist/commands/index-repo.d.ts +8 -0
  12. package/dist/commands/index-repo.d.ts.map +1 -0
  13. package/dist/commands/index-repo.js +79 -0
  14. package/dist/commands/index-repo.js.map +1 -0
  15. package/dist/commands/query.d.ts +7 -0
  16. package/dist/commands/query.d.ts.map +1 -0
  17. package/dist/commands/query.js +32 -0
  18. package/dist/commands/query.js.map +1 -0
  19. package/dist/commands/serve.d.ts +7 -0
  20. package/dist/commands/serve.d.ts.map +1 -0
  21. package/dist/commands/serve.js +65 -0
  22. package/dist/commands/serve.js.map +1 -0
  23. package/dist/db.d.ts +46 -0
  24. package/dist/db.d.ts.map +1 -0
  25. package/dist/db.js +117 -0
  26. package/dist/db.js.map +1 -0
  27. package/dist/embeddings.d.ts +4 -0
  28. package/dist/embeddings.d.ts.map +1 -0
  29. package/dist/embeddings.js +35 -0
  30. package/dist/embeddings.js.map +1 -0
  31. package/dist/index.d.ts +6 -0
  32. package/dist/index.d.ts.map +1 -0
  33. package/dist/index.js +6 -0
  34. package/dist/index.js.map +1 -0
  35. package/dist/parser.d.ts +12 -0
  36. package/dist/parser.d.ts.map +1 -0
  37. package/dist/parser.js +152 -0
  38. package/dist/parser.js.map +1 -0
  39. package/package.json +54 -0
  40. package/src/cli.ts +52 -0
  41. package/src/commands/index-repo.ts +96 -0
  42. package/src/commands/query.ts +43 -0
  43. package/src/commands/serve.ts +90 -0
  44. package/src/db.ts +168 -0
  45. package/src/embeddings.ts +39 -0
  46. package/src/index.ts +5 -0
  47. package/src/parser.ts +162 -0
  48. package/tsconfig.json +19 -0
package/src/db.ts ADDED
@@ -0,0 +1,168 @@
1
+ import Database from 'better-sqlite3';
2
+ import path from 'path';
3
+ import fs from 'fs';
4
+
5
+ export interface Entity {
6
+ id?: number;
7
+ filePath: string;
8
+ name: string;
9
+ kind: 'file' | 'function' | 'class' | 'method' | 'variable' | 'import' | 'export';
10
+ startLine: number;
11
+ endLine: number;
12
+ code: string;
13
+ language: string;
14
+ }
15
+
16
+ export interface Edge {
17
+ id?: number;
18
+ sourceId: number;
19
+ targetId: number;
20
+ relation: 'contains' | 'imports' | 'exports' | 'calls' | 'extends' | 'implements';
21
+ }
22
+
23
+ export interface Chunk {
24
+ id?: number;
25
+ entityId: number;
26
+ content: string;
27
+ embedding?: Float64Array | null;
28
+ }
29
+
30
+ const SCHEMA = `
31
+ CREATE TABLE IF NOT EXISTS entities (
32
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
33
+ file_path TEXT NOT NULL,
34
+ name TEXT NOT NULL,
35
+ kind TEXT NOT NULL,
36
+ start_line INTEGER NOT NULL,
37
+ end_line INTEGER NOT NULL,
38
+ code TEXT NOT NULL,
39
+ language TEXT NOT NULL
40
+ );
41
+
42
+ CREATE TABLE IF NOT EXISTS edges (
43
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
44
+ source_id INTEGER NOT NULL REFERENCES entities(id),
45
+ target_id INTEGER NOT NULL REFERENCES entities(id),
46
+ relation TEXT NOT NULL
47
+ );
48
+
49
+ CREATE TABLE IF NOT EXISTS chunks (
50
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
51
+ entity_id INTEGER NOT NULL REFERENCES entities(id),
52
+ content TEXT NOT NULL,
53
+ embedding BLOB
54
+ );
55
+
56
+ CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(name);
57
+ CREATE INDEX IF NOT EXISTS idx_entities_kind ON entities(kind);
58
+ CREATE INDEX IF NOT EXISTS idx_entities_file ON entities(file_path);
59
+ CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id);
60
+ CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id);
61
+ CREATE INDEX IF NOT EXISTS idx_chunks_entity ON chunks(entity_id);
62
+ `;
63
+
64
+ export class GraphDB {
65
+ private db: Database.Database;
66
+
67
+ constructor(dbPath: string) {
68
+ const dir = path.dirname(dbPath);
69
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
70
+ this.db = new Database(dbPath);
71
+ this.db.pragma('journal_mode = WAL');
72
+ this.db.exec(SCHEMA);
73
+ }
74
+
75
+ insertEntity(e: Entity): number {
76
+ const stmt = this.db.prepare(
77
+ 'INSERT INTO entities (file_path, name, kind, start_line, end_line, code, language) VALUES (?, ?, ?, ?, ?, ?, ?)'
78
+ );
79
+ const result = stmt.run(e.filePath, e.name, e.kind, e.startLine, e.endLine, e.code, e.language);
80
+ return Number(result.lastInsertRowid);
81
+ }
82
+
83
+ insertEdge(e: Edge): number {
84
+ const stmt = this.db.prepare(
85
+ 'INSERT INTO edges (source_id, target_id, relation) VALUES (?, ?, ?)'
86
+ );
87
+ const result = stmt.run(e.sourceId, e.targetId, e.relation);
88
+ return Number(result.lastInsertRowid);
89
+ }
90
+
91
+ insertChunk(c: Chunk): number {
92
+ const stmt = this.db.prepare(
93
+ 'INSERT INTO chunks (entity_id, content, embedding) VALUES (?, ?, ?)'
94
+ );
95
+ const embeddingBlob = c.embedding ? Buffer.from(c.embedding.buffer) : null;
96
+ const result = stmt.run(c.entityId, c.content, embeddingBlob);
97
+ return Number(result.lastInsertRowid);
98
+ }
99
+
100
+ getEntity(id: number): Entity | undefined {
101
+ return this.db.prepare('SELECT * FROM entities WHERE id = ?').get(id) as Entity | undefined;
102
+ }
103
+
104
+ findEntitiesByName(name: string): Entity[] {
105
+ return this.db.prepare(
106
+ 'SELECT * FROM entities WHERE name LIKE ?'
107
+ ).all(`%${name}%`) as Entity[];
108
+ }
109
+
110
+ searchEntities(query: string, limit = 5): Entity[] {
111
+ // Text-based search across name and code
112
+ const terms = query.toLowerCase().split(/\s+/).filter(Boolean);
113
+ if (terms.length === 0) return [];
114
+ const conditions = terms.map(() => '(LOWER(name) LIKE ? OR LOWER(code) LIKE ?)').join(' AND ');
115
+ const params = terms.flatMap(t => [`%${t}%`, `%${t}%`]);
116
+ return this.db.prepare(
117
+ `SELECT * FROM entities WHERE ${conditions} ORDER BY kind, name LIMIT ?`
118
+ ).all(...params, limit) as Entity[];
119
+ }
120
+
121
+ getRelated(entityId: number, direction: 'outgoing' | 'incoming' | 'both' = 'both'): Array<{ entity: Entity; relation: string }> {
122
+ const results: Array<{ entity: Entity; relation: string }> = [];
123
+ if (direction === 'outgoing' || direction === 'both') {
124
+ const rows = this.db.prepare(
125
+ 'SELECT e.*, ed.relation FROM entities e JOIN edges ed ON e.id = ed.target_id WHERE ed.source_id = ?'
126
+ ).all(entityId) as Array<Entity & { relation: string }>;
127
+ results.push(...rows.map(r => ({ entity: r, relation: r.relation })));
128
+ }
129
+ if (direction === 'incoming' || direction === 'both') {
130
+ const rows = this.db.prepare(
131
+ 'SELECT e.*, ed.relation FROM entities e JOIN edges ed ON e.id = ed.source_id WHERE ed.target_id = ?'
132
+ ).all(entityId) as Array<Entity & { relation: string }>;
133
+ results.push(...rows.map(r => ({ entity: r, relation: r.relation })));
134
+ }
135
+ return results;
136
+ }
137
+
138
+ getUsages(name: string): Entity[] {
139
+ return this.db.prepare(
140
+ `SELECT DISTINCT e2.* FROM entities e1
141
+ JOIN edges ed ON e1.id = ed.target_id
142
+ JOIN entities e2 ON e2.id = ed.source_id
143
+ WHERE e1.name = ? AND ed.relation IN ('calls', 'imports')`
144
+ ).all(name) as Entity[];
145
+ }
146
+
147
+ clearFile(filePath: string): void {
148
+ const entities = this.db.prepare('SELECT id FROM entities WHERE file_path = ?').all(filePath) as Array<{ id: number }>;
149
+ const ids = entities.map(e => e.id);
150
+ if (ids.length === 0) return;
151
+ const placeholders = ids.map(() => '?').join(',');
152
+ this.db.prepare(`DELETE FROM chunks WHERE entity_id IN (${placeholders})`).run(...ids);
153
+ this.db.prepare(`DELETE FROM edges WHERE source_id IN (${placeholders}) OR target_id IN (${placeholders})`).run(...ids, ...ids);
154
+ this.db.prepare(`DELETE FROM entities WHERE file_path = ?`).run(filePath);
155
+ }
156
+
157
+ getStats(): { entities: number; edges: number; chunks: number; files: number } {
158
+ const entities = (this.db.prepare('SELECT COUNT(*) as c FROM entities').get() as { c: number }).c;
159
+ const edges = (this.db.prepare('SELECT COUNT(*) as c FROM edges').get() as { c: number }).c;
160
+ const chunks = (this.db.prepare('SELECT COUNT(*) as c FROM chunks').get() as { c: number }).c;
161
+ const files = (this.db.prepare('SELECT COUNT(DISTINCT file_path) as c FROM entities').get() as { c: number }).c;
162
+ return { entities, edges, chunks, files };
163
+ }
164
+
165
+ close(): void {
166
+ this.db.close();
167
+ }
168
+ }
@@ -0,0 +1,39 @@
1
+ import OpenAI from 'openai';
2
+
3
+ let client: OpenAI | null = null;
4
+
5
+ function getClient(): OpenAI {
6
+ if (!client) {
7
+ client = new OpenAI();
8
+ }
9
+ return client;
10
+ }
11
+
12
+ export async function getEmbedding(text: string): Promise<number[]> {
13
+ const openai = getClient();
14
+ const resp = await openai.embeddings.create({
15
+ model: 'text-embedding-3-small',
16
+ input: text.substring(0, 8000),
17
+ });
18
+ return resp.data[0].embedding;
19
+ }
20
+
21
+ export async function getEmbeddings(texts: string[]): Promise<number[][]> {
22
+ const openai = getClient();
23
+ const truncated = texts.map(t => t.substring(0, 8000));
24
+ const resp = await openai.embeddings.create({
25
+ model: 'text-embedding-3-small',
26
+ input: truncated,
27
+ });
28
+ return resp.data.map(d => d.embedding);
29
+ }
30
+
31
+ export function cosineSimilarity(a: number[], b: number[]): number {
32
+ let dot = 0, normA = 0, normB = 0;
33
+ for (let i = 0; i < a.length; i++) {
34
+ dot += a[i] * b[i];
35
+ normA += a[i] * a[i];
36
+ normB += b[i] * b[i];
37
+ }
38
+ return dot / (Math.sqrt(normA) * Math.sqrt(normB));
39
+ }
package/src/index.ts ADDED
@@ -0,0 +1,5 @@
1
+ export { indexRepo } from './commands/index-repo.js';
2
+ export { queryCode } from './commands/query.js';
3
+ export { serve } from './commands/serve.js';
4
+ export { GraphDB } from './db.js';
5
+ export { parseFile } from './parser.js';
package/src/parser.ts ADDED
@@ -0,0 +1,162 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import type { Entity, Edge } from './db.js';
4
+
5
+ // Regex-based parser (no native tree-sitter dependency — works everywhere)
6
+
7
+ interface ParseResult {
8
+ entities: Omit<Entity, 'id'>[];
9
+ edges: Array<{ sourceName: string; targetName: string; relation: Edge['relation'] }>;
10
+ }
11
+
12
+ const FUNCTION_PATTERNS: Record<string, RegExp[]> = {
13
+ typescript: [
14
+ /^(?:export\s+)?(?:async\s+)?function\s+(\w+)/gm,
15
+ /^(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?\(/gm,
16
+ /^(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?(?:\([^)]*\)|[^=])\s*=>/gm,
17
+ ],
18
+ python: [
19
+ /^def\s+(\w+)\s*\(/gm,
20
+ /^async\s+def\s+(\w+)\s*\(/gm,
21
+ ],
22
+ };
23
+
24
+ const CLASS_PATTERNS: Record<string, RegExp[]> = {
25
+ typescript: [/^(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/gm],
26
+ python: [/^class\s+(\w+)/gm],
27
+ };
28
+
29
+ const IMPORT_PATTERNS: Record<string, RegExp[]> = {
30
+ typescript: [
31
+ /import\s+(?:{([^}]+)}|(\w+))\s+from\s+['"]([^'"]+)['"]/gm,
32
+ /import\s+\*\s+as\s+(\w+)\s+from\s+['"]([^'"]+)['"]/gm,
33
+ ],
34
+ python: [
35
+ /^from\s+(\S+)\s+import\s+(.+)$/gm,
36
+ /^import\s+(\S+)/gm,
37
+ ],
38
+ };
39
+
40
+ function detectLanguage(filePath: string): string {
41
+ const ext = path.extname(filePath).toLowerCase();
42
+ if (['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'].includes(ext)) return 'typescript';
43
+ if (ext === '.py') return 'python';
44
+ return 'unknown';
45
+ }
46
+
47
+ function getLineNumber(source: string, index: number): number {
48
+ return source.substring(0, index).split('\n').length;
49
+ }
50
+
51
+ function extractBlock(lines: string[], startLine: number, language: string): { endLine: number; code: string } {
52
+ // Simple brace/indent block extraction
53
+ const start = startLine - 1;
54
+ if (language === 'python') {
55
+ let end = start + 1;
56
+ const baseIndent = lines[start]?.search(/\S/) ?? 0;
57
+ while (end < lines.length) {
58
+ const line = lines[end];
59
+ if (line.trim() === '') { end++; continue; }
60
+ const indent = line.search(/\S/);
61
+ if (indent <= baseIndent) break;
62
+ end++;
63
+ }
64
+ return { endLine: end, code: lines.slice(start, end).join('\n') };
65
+ }
66
+
67
+ // Brace-based
68
+ let braces = 0;
69
+ let found = false;
70
+ let end = start;
71
+ for (let i = start; i < lines.length; i++) {
72
+ for (const ch of lines[i]) {
73
+ if (ch === '{') { braces++; found = true; }
74
+ if (ch === '}') braces--;
75
+ }
76
+ end = i + 1;
77
+ if (found && braces <= 0) break;
78
+ }
79
+ if (!found) end = Math.min(start + 1, lines.length);
80
+ return { endLine: end, code: lines.slice(start, end).join('\n') };
81
+ }
82
+
83
+ export function parseFile(filePath: string, source?: string): ParseResult {
84
+ const content = source ?? fs.readFileSync(filePath, 'utf-8');
85
+ const language = detectLanguage(filePath);
86
+ if (language === 'unknown') return { entities: [], edges: [] };
87
+
88
+ const lines = content.split('\n');
89
+ const entities: Omit<Entity, 'id'>[] = [];
90
+ const edgesList: ParseResult['edges'] = [];
91
+
92
+ // File entity
93
+ entities.push({
94
+ filePath, name: path.basename(filePath), kind: 'file',
95
+ startLine: 1, endLine: lines.length, code: content.substring(0, 500),
96
+ language,
97
+ });
98
+
99
+ const langKey = language;
100
+
101
+ // Functions
102
+ for (const pattern of (FUNCTION_PATTERNS[langKey] ?? [])) {
103
+ const regex = new RegExp(pattern.source, pattern.flags);
104
+ let match;
105
+ while ((match = regex.exec(content)) !== null) {
106
+ const name = match[1];
107
+ const startLine = getLineNumber(content, match.index);
108
+ const block = extractBlock(lines, startLine, language);
109
+ entities.push({
110
+ filePath, name, kind: 'function',
111
+ startLine, endLine: block.endLine, code: block.code,
112
+ language,
113
+ });
114
+ edgesList.push({ sourceName: path.basename(filePath), targetName: name, relation: 'contains' });
115
+ }
116
+ }
117
+
118
+ // Classes
119
+ for (const pattern of (CLASS_PATTERNS[langKey] ?? [])) {
120
+ const regex = new RegExp(pattern.source, pattern.flags);
121
+ let match;
122
+ while ((match = regex.exec(content)) !== null) {
123
+ const name = match[1];
124
+ const startLine = getLineNumber(content, match.index);
125
+ const block = extractBlock(lines, startLine, language);
126
+ entities.push({
127
+ filePath, name, kind: 'class',
128
+ startLine, endLine: block.endLine, code: block.code,
129
+ language,
130
+ });
131
+ edgesList.push({ sourceName: path.basename(filePath), targetName: name, relation: 'contains' });
132
+ }
133
+ }
134
+
135
+ // Imports
136
+ for (const pattern of (IMPORT_PATTERNS[langKey] ?? [])) {
137
+ const regex = new RegExp(pattern.source, pattern.flags);
138
+ let match;
139
+ while ((match = regex.exec(content)) !== null) {
140
+ const startLine = getLineNumber(content, match.index);
141
+ const importSource = language === 'typescript' ? (match[3] ?? match[2] ?? match[1]) : match[1];
142
+ const importedNames = language === 'typescript'
143
+ ? (match[1] ?? match[2] ?? '').split(',').map(s => s.trim()).filter(Boolean)
144
+ : (match[2] ?? match[1] ?? '').split(',').map(s => s.trim()).filter(Boolean);
145
+
146
+ for (const name of importedNames) {
147
+ entities.push({
148
+ filePath, name: name.replace(/\s+as\s+\w+/, ''), kind: 'import',
149
+ startLine, endLine: startLine, code: match[0],
150
+ language,
151
+ });
152
+ edgesList.push({
153
+ sourceName: path.basename(filePath),
154
+ targetName: name.replace(/\s+as\s+\w+/, ''),
155
+ relation: 'imports',
156
+ });
157
+ }
158
+ }
159
+ }
160
+
161
+ return { entities, edges: edgesList };
162
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,19 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "Node16",
5
+ "moduleResolution": "Node16",
6
+ "outDir": "./dist",
7
+ "rootDir": "./src",
8
+ "strict": true,
9
+ "esModuleInterop": true,
10
+ "skipLibCheck": true,
11
+ "forceConsistentCasingInFileNames": true,
12
+ "declaration": true,
13
+ "declarationMap": true,
14
+ "sourceMap": true,
15
+ "resolveJsonModule": true
16
+ },
17
+ "include": ["src/**/*.ts"],
18
+ "exclude": ["node_modules", "dist"]
19
+ }