lattice-graph 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +391 -0
- package/package.json +56 -0
- package/src/commands/build.ts +208 -0
- package/src/commands/init.ts +111 -0
- package/src/commands/lint.ts +245 -0
- package/src/commands/populate.ts +224 -0
- package/src/commands/update.ts +175 -0
- package/src/config.ts +93 -0
- package/src/extract/extractor.ts +13 -0
- package/src/extract/parser.ts +117 -0
- package/src/extract/python/calls.ts +121 -0
- package/src/extract/python/extractor.ts +171 -0
- package/src/extract/python/frameworks.ts +142 -0
- package/src/extract/python/imports.ts +115 -0
- package/src/extract/python/symbols.ts +121 -0
- package/src/extract/tags.ts +77 -0
- package/src/extract/typescript/calls.ts +110 -0
- package/src/extract/typescript/extractor.ts +130 -0
- package/src/extract/typescript/imports.ts +71 -0
- package/src/extract/typescript/symbols.ts +252 -0
- package/src/graph/database.ts +95 -0
- package/src/graph/queries.ts +336 -0
- package/src/graph/writer.ts +147 -0
- package/src/main.ts +525 -0
- package/src/output/json.ts +79 -0
- package/src/output/text.ts +265 -0
- package/src/types/config.ts +32 -0
- package/src/types/graph.ts +87 -0
- package/src/types/lint.ts +21 -0
- package/src/types/result.ts +58 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import { Database } from "bun:sqlite";
|
|
2
|
+
import { existsSync, statSync } from "node:fs";
|
|
3
|
+
import { join, relative } from "node:path";
|
|
4
|
+
import type { Extractor } from "../extract/extractor.ts";
|
|
5
|
+
import { initTreeSitter } from "../extract/parser.ts";
|
|
6
|
+
import { createPythonExtractor } from "../extract/python/extractor.ts";
|
|
7
|
+
import { createTypeScriptExtractor } from "../extract/typescript/extractor.ts";
|
|
8
|
+
import { checkSchemaVersion } from "../graph/database.ts";
|
|
9
|
+
import {
|
|
10
|
+
deleteFileData,
|
|
11
|
+
insertEdges,
|
|
12
|
+
insertNodes,
|
|
13
|
+
insertTags,
|
|
14
|
+
insertUnresolved,
|
|
15
|
+
synthesizeEventEdges,
|
|
16
|
+
} from "../graph/writer.ts";
|
|
17
|
+
import type { LatticeConfig } from "../types/config.ts";
|
|
18
|
+
import type { ExtractionResult } from "../types/graph.ts";
|
|
19
|
+
import { err, isOk, ok, type Result } from "../types/result.ts";
|
|
20
|
+
|
|
21
|
+
/** Statistics from an incremental update. */
|
|
22
|
+
type UpdateStats = {
|
|
23
|
+
readonly totalFiles: number;
|
|
24
|
+
readonly filesReindexed: number;
|
|
25
|
+
readonly durationMs: number;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Performs an incremental update of the knowledge graph.
|
|
30
|
+
* Only re-indexes files that have changed since the last build.
|
|
31
|
+
* Falls back to a full rebuild if >30% of files are dirty.
|
|
32
|
+
*
|
|
33
|
+
* @param projectRoot - Path to the project root
|
|
34
|
+
* @param config - Lattice configuration
|
|
35
|
+
* @returns Update statistics or an error message
|
|
36
|
+
*/
|
|
37
|
+
// @lattice:flow update
|
|
38
|
+
async function executeUpdate(
|
|
39
|
+
projectRoot: string,
|
|
40
|
+
config: LatticeConfig,
|
|
41
|
+
): Promise<Result<UpdateStats, string>> {
|
|
42
|
+
const startTime = Date.now();
|
|
43
|
+
const dbPath = join(projectRoot, ".lattice", "graph.db");
|
|
44
|
+
|
|
45
|
+
if (!existsSync(dbPath)) {
|
|
46
|
+
return err("No existing graph found. Run 'lattice build' first.");
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
const db = new Database(dbPath);
|
|
51
|
+
const schemaCheck = checkSchemaVersion(db);
|
|
52
|
+
if (!isOk(schemaCheck)) {
|
|
53
|
+
db.close();
|
|
54
|
+
return err(schemaCheck.error);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Get last build time
|
|
58
|
+
const lastBuildRow = db.query("SELECT value FROM meta WHERE key = 'last_build'").get() as {
|
|
59
|
+
value: string;
|
|
60
|
+
} | null;
|
|
61
|
+
if (!lastBuildRow) {
|
|
62
|
+
db.close();
|
|
63
|
+
return err("No last_build timestamp found. Run 'lattice build' first.");
|
|
64
|
+
}
|
|
65
|
+
const lastBuild = new Date(lastBuildRow.value);
|
|
66
|
+
|
|
67
|
+
// Initialize extractors
|
|
68
|
+
await initTreeSitter();
|
|
69
|
+
const extractors = await createExtractors(config);
|
|
70
|
+
const extByExt = new Map<string, Extractor>();
|
|
71
|
+
for (const ext of extractors) {
|
|
72
|
+
for (const fileExt of ext.fileExtensions) {
|
|
73
|
+
extByExt.set(fileExt, ext);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Scan all files, find changed ones
|
|
78
|
+
const sourceRoot = join(projectRoot, config.root);
|
|
79
|
+
const glob = new Bun.Glob("**/*");
|
|
80
|
+
const allFiles: string[] = [];
|
|
81
|
+
const changedFiles: string[] = [];
|
|
82
|
+
|
|
83
|
+
for await (const path of glob.scan({ cwd: sourceRoot, dot: false })) {
|
|
84
|
+
const ext = `.${path.split(".").pop()}`;
|
|
85
|
+
if (!extByExt.has(ext)) continue;
|
|
86
|
+
if (isExcluded(path, config.exclude)) continue;
|
|
87
|
+
allFiles.push(path);
|
|
88
|
+
|
|
89
|
+
const fullPath = join(sourceRoot, path);
|
|
90
|
+
const stat = statSync(fullPath);
|
|
91
|
+
if (stat.mtime > lastBuild) {
|
|
92
|
+
changedFiles.push(path);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Fall back to full rebuild if >30% changed
|
|
97
|
+
if (changedFiles.length > allFiles.length * 0.3) {
|
|
98
|
+
db.close();
|
|
99
|
+
const { executeBuild } = await import("./build.ts");
|
|
100
|
+
return executeBuild(projectRoot, config).then((result) => {
|
|
101
|
+
if (isOk(result)) {
|
|
102
|
+
return ok({
|
|
103
|
+
totalFiles: allFiles.length,
|
|
104
|
+
filesReindexed: allFiles.length,
|
|
105
|
+
durationMs: Date.now() - startTime,
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
return err(result.error);
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Re-extract changed files
|
|
113
|
+
for (const file of changedFiles) {
|
|
114
|
+
const ext = `.${file.split(".").pop()}`;
|
|
115
|
+
const extractor = extByExt.get(ext);
|
|
116
|
+
if (!extractor) continue;
|
|
117
|
+
|
|
118
|
+
const fullPath = join(sourceRoot, file);
|
|
119
|
+
const source = await Bun.file(fullPath).text();
|
|
120
|
+
const relativePath = relative(projectRoot, fullPath);
|
|
121
|
+
|
|
122
|
+
// Delete old data for this file
|
|
123
|
+
deleteFileData(db, relativePath);
|
|
124
|
+
|
|
125
|
+
// Re-extract
|
|
126
|
+
const result: ExtractionResult = await extractor.extract(relativePath, source);
|
|
127
|
+
insertNodes(db, result.nodes);
|
|
128
|
+
insertEdges(db, result.edges);
|
|
129
|
+
insertTags(db, result.tags);
|
|
130
|
+
insertUnresolved(db, result.unresolved);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Rebuild event edges
|
|
134
|
+
synthesizeEventEdges(db);
|
|
135
|
+
|
|
136
|
+
// Update timestamp
|
|
137
|
+
db.run("INSERT OR REPLACE INTO meta (key, value) VALUES ('last_build', ?)", [
|
|
138
|
+
new Date().toISOString(),
|
|
139
|
+
]);
|
|
140
|
+
|
|
141
|
+
db.close();
|
|
142
|
+
|
|
143
|
+
return ok({
|
|
144
|
+
totalFiles: allFiles.length,
|
|
145
|
+
filesReindexed: changedFiles.length,
|
|
146
|
+
durationMs: Date.now() - startTime,
|
|
147
|
+
});
|
|
148
|
+
} catch (error) {
|
|
149
|
+
return err(`Update failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/** Creates extractors for configured languages. */
|
|
154
|
+
async function createExtractors(config: LatticeConfig): Promise<readonly Extractor[]> {
|
|
155
|
+
const extractors: Extractor[] = [];
|
|
156
|
+
for (const lang of config.languages) {
|
|
157
|
+
if (lang === "python") {
|
|
158
|
+
extractors.push(await createPythonExtractor());
|
|
159
|
+
}
|
|
160
|
+
if (lang === "typescript") {
|
|
161
|
+
extractors.push(await createTypeScriptExtractor());
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return extractors;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/** Checks if a file path matches any exclude pattern. */
|
|
168
|
+
function isExcluded(filePath: string, excludePatterns: readonly string[]): boolean {
|
|
169
|
+
for (const pattern of excludePatterns) {
|
|
170
|
+
if (filePath.includes(pattern.replace("**", "").replace("*", ""))) return true;
|
|
171
|
+
}
|
|
172
|
+
return false;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
export { executeUpdate, type UpdateStats };
|
package/src/config.ts
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { parse as parseToml } from "smol-toml";
|
|
2
|
+
import type { LatticeConfig, LintConfig, PythonConfig, TypeScriptConfig } from "./types/config.ts";
|
|
3
|
+
import { err, ok, type Result } from "./types/result.ts";
|
|
4
|
+
|
|
5
|
+
const DEFAULT_EXCLUDE = ["node_modules", "venv", ".git", "dist", "__pycache__", ".lattice"];
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Parses a TOML string into a validated LatticeConfig.
|
|
9
|
+
* Returns an error for invalid TOML, missing required fields, or validation failures.
|
|
10
|
+
*
|
|
11
|
+
* @param tomlString - Raw contents of a lattice.toml file
|
|
12
|
+
* @returns Parsed and validated config, or an error message
|
|
13
|
+
*/
|
|
14
|
+
function parseConfig(tomlString: string): Result<LatticeConfig, string> {
|
|
15
|
+
let raw: Record<string, unknown>;
|
|
16
|
+
try {
|
|
17
|
+
raw = parseToml(tomlString) as Record<string, unknown>;
|
|
18
|
+
} catch {
|
|
19
|
+
return err("Invalid TOML syntax");
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const project = raw.project;
|
|
23
|
+
if (!isRecord(project)) {
|
|
24
|
+
return err("Missing [project] section");
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const languages = project.languages;
|
|
28
|
+
if (!isStringArray(languages) || languages.length === 0) {
|
|
29
|
+
return err("project.languages must be a non-empty array of strings");
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const root = typeof project.root === "string" ? project.root : ".";
|
|
33
|
+
const exclude = isStringArray(project.exclude) ? project.exclude : DEFAULT_EXCLUDE;
|
|
34
|
+
|
|
35
|
+
const pythonConfig = languages.includes("python") ? parsePythonSection(raw.python) : undefined;
|
|
36
|
+
|
|
37
|
+
const typescriptConfig = languages.includes("typescript")
|
|
38
|
+
? parseTypeScriptSection(raw.typescript)
|
|
39
|
+
: undefined;
|
|
40
|
+
|
|
41
|
+
const lintConfig = parseLintSection(raw.lint);
|
|
42
|
+
|
|
43
|
+
return ok({
|
|
44
|
+
languages,
|
|
45
|
+
root,
|
|
46
|
+
exclude,
|
|
47
|
+
python: pythonConfig,
|
|
48
|
+
typescript: typescriptConfig,
|
|
49
|
+
lint: lintConfig,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Parses the [python] section with defaults for missing fields. */
|
|
54
|
+
function parsePythonSection(raw: unknown): PythonConfig {
|
|
55
|
+
const section = isRecord(raw) ? raw : {};
|
|
56
|
+
return {
|
|
57
|
+
sourceRoots: isStringArray(section.source_roots) ? section.source_roots : ["."],
|
|
58
|
+
testPaths: isStringArray(section.test_paths) ? section.test_paths : ["tests"],
|
|
59
|
+
frameworks: isStringArray(section.frameworks) ? section.frameworks : [],
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/** Parses the [typescript] section with defaults for missing fields. */
|
|
64
|
+
function parseTypeScriptSection(raw: unknown): TypeScriptConfig {
|
|
65
|
+
const section = isRecord(raw) ? raw : {};
|
|
66
|
+
return {
|
|
67
|
+
sourceRoots: isStringArray(section.source_roots) ? section.source_roots : ["."],
|
|
68
|
+
testPaths: isStringArray(section.test_paths) ? section.test_paths : ["__tests__"],
|
|
69
|
+
tsconfig: typeof section.tsconfig === "string" ? section.tsconfig : undefined,
|
|
70
|
+
frameworks: isStringArray(section.frameworks) ? section.frameworks : [],
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** Parses the [lint] section with defaults for missing fields. */
|
|
75
|
+
function parseLintSection(raw: unknown): LintConfig {
|
|
76
|
+
const section = isRecord(raw) ? raw : {};
|
|
77
|
+
return {
|
|
78
|
+
strict: section.strict === true,
|
|
79
|
+
ignore: isStringArray(section.ignore) ? section.ignore : [],
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** Type guard for plain objects. */
|
|
84
|
+
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
85
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/** Type guard for string arrays. */
|
|
89
|
+
function isStringArray(value: unknown): value is string[] {
|
|
90
|
+
return Array.isArray(value) && value.every((item) => typeof item === "string");
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export { parseConfig };
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { ExtractionResult } from "../types/graph.ts";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Language-specific extractor that produces nodes, edges, and tags from source files.
|
|
5
|
+
* Each supported language implements this type.
|
|
6
|
+
*/
|
|
7
|
+
type Extractor = {
|
|
8
|
+
readonly language: string;
|
|
9
|
+
readonly fileExtensions: readonly string[];
|
|
10
|
+
readonly extract: (filePath: string, source: string) => Promise<ExtractionResult>;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
export type { Extractor };
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared tree-sitter parser initialization.
|
|
3
|
+
* Uses web-tree-sitter with WASM grammars from tree-sitter-wasms.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// web-tree-sitter 0.24.x uses CJS exports
|
|
7
|
+
// biome-ignore lint/suspicious/noExplicitAny: web-tree-sitter 0.24 has no ESM types
|
|
8
|
+
const TreeSitter = require("web-tree-sitter") as any;
|
|
9
|
+
|
|
10
|
+
type TreeSitterParser = {
|
|
11
|
+
setLanguage(lang: TreeSitterLanguage): void;
|
|
12
|
+
parse(input: string): TreeSitterTree;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
type TreeSitterLanguage = {
|
|
16
|
+
readonly version: number;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
type TreeSitterTree = {
|
|
20
|
+
readonly rootNode: TreeSitterNode;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
type TreeSitterNode = {
|
|
24
|
+
readonly type: string;
|
|
25
|
+
readonly text: string;
|
|
26
|
+
readonly startPosition: { row: number; column: number };
|
|
27
|
+
readonly endPosition: { row: number; column: number };
|
|
28
|
+
readonly startIndex: number;
|
|
29
|
+
readonly endIndex: number;
|
|
30
|
+
readonly childCount: number;
|
|
31
|
+
readonly children: readonly TreeSitterNode[];
|
|
32
|
+
readonly parent: TreeSitterNode | null;
|
|
33
|
+
readonly firstChild: TreeSitterNode | null;
|
|
34
|
+
readonly lastChild: TreeSitterNode | null;
|
|
35
|
+
readonly nextSibling: TreeSitterNode | null;
|
|
36
|
+
readonly previousSibling: TreeSitterNode | null;
|
|
37
|
+
childForFieldName(fieldName: string): TreeSitterNode | null;
|
|
38
|
+
childrenForFieldName(fieldName: string): readonly TreeSitterNode[];
|
|
39
|
+
descendantsOfType(type: string | readonly string[]): readonly TreeSitterNode[];
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
let initialized = false;
|
|
43
|
+
const languageCache = new Map<string, TreeSitterLanguage>();
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Resolves the WASM file path for a language grammar.
|
|
47
|
+
* Tries multiple locations to support both dev and compiled binary modes:
|
|
48
|
+
* 1. Relative to this source file (dev: bun src/main.ts)
|
|
49
|
+
* 2. Relative to the compiled binary (binary: ./lattice)
|
|
50
|
+
* 3. Relative to CWD (when node_modules exists in working dir)
|
|
51
|
+
*/
|
|
52
|
+
function grammarPath(language: string): string {
|
|
53
|
+
const { existsSync } = require("node:fs");
|
|
54
|
+
const { dirname, join } = require("node:path");
|
|
55
|
+
const filename = `tree-sitter-${language}.wasm`;
|
|
56
|
+
const subpath = `node_modules/tree-sitter-wasms/out/${filename}`;
|
|
57
|
+
|
|
58
|
+
// Try relative to this source file (dev mode)
|
|
59
|
+
const packageRoot = new URL("../../", import.meta.url).pathname;
|
|
60
|
+
const devPath = join(packageRoot, subpath);
|
|
61
|
+
if (existsSync(devPath)) return devPath;
|
|
62
|
+
|
|
63
|
+
// Try relative to the binary's location (compiled mode)
|
|
64
|
+
const binDir = dirname(process.execPath);
|
|
65
|
+
const binPath = join(binDir, subpath);
|
|
66
|
+
if (existsSync(binPath)) return binPath;
|
|
67
|
+
|
|
68
|
+
// Try relative to CWD
|
|
69
|
+
const cwdPath = join(process.cwd(), subpath);
|
|
70
|
+
if (existsSync(cwdPath)) return cwdPath;
|
|
71
|
+
|
|
72
|
+
// Fall back — will produce a clear error
|
|
73
|
+
return devPath;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Initializes tree-sitter WASM runtime. Must be called once before parsing.
|
|
78
|
+
* Safe to call multiple times — subsequent calls are no-ops.
|
|
79
|
+
*/
|
|
80
|
+
async function initTreeSitter(): Promise<void> {
|
|
81
|
+
if (initialized) return;
|
|
82
|
+
await TreeSitter.init();
|
|
83
|
+
initialized = true;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Creates a parser configured for the given language.
|
|
88
|
+
*
|
|
89
|
+
* @param language - Language name matching the WASM grammar file (e.g., "python", "typescript")
|
|
90
|
+
* @returns A configured parser ready to parse source code
|
|
91
|
+
*/
|
|
92
|
+
async function createParser(language: string): Promise<TreeSitterParser> {
|
|
93
|
+
await initTreeSitter();
|
|
94
|
+
|
|
95
|
+
const cached = languageCache.get(language);
|
|
96
|
+
const lang = cached ?? (await loadAndCacheLanguage(language));
|
|
97
|
+
|
|
98
|
+
const parser = new TreeSitter();
|
|
99
|
+
parser.setLanguage(lang);
|
|
100
|
+
return parser as TreeSitterParser;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** Loads a language grammar from WASM and caches it. */
|
|
104
|
+
async function loadAndCacheLanguage(language: string): Promise<TreeSitterLanguage> {
|
|
105
|
+
const lang = (await TreeSitter.Language.load(grammarPath(language))) as TreeSitterLanguage;
|
|
106
|
+
languageCache.set(language, lang);
|
|
107
|
+
return lang;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export {
|
|
111
|
+
createParser,
|
|
112
|
+
initTreeSitter,
|
|
113
|
+
type TreeSitterLanguage,
|
|
114
|
+
type TreeSitterNode,
|
|
115
|
+
type TreeSitterParser,
|
|
116
|
+
type TreeSitterTree,
|
|
117
|
+
};
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import type { TreeSitterNode, TreeSitterTree } from "../parser.ts";
|
|
2
|
+
|
|
3
|
+
/** A raw call detected in the AST before resolution to graph edges. */
|
|
4
|
+
type RawCall = {
|
|
5
|
+
readonly sourceId: string;
|
|
6
|
+
readonly callee: string;
|
|
7
|
+
readonly line: number;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Extracts function calls from a Python AST.
|
|
12
|
+
* Each call is scoped to its enclosing function or method.
|
|
13
|
+
*
|
|
14
|
+
* @param tree - Parsed tree-sitter tree
|
|
15
|
+
* @param filePath - Relative file path for source ID construction
|
|
16
|
+
* @returns Raw calls with caller ID and callee expression
|
|
17
|
+
*/
|
|
18
|
+
function extractPythonCalls(tree: TreeSitterTree, filePath: string): readonly RawCall[] {
|
|
19
|
+
const calls: RawCall[] = [];
|
|
20
|
+
visitForCalls(tree.rootNode, filePath, [], calls);
|
|
21
|
+
return calls;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/** Scope entry for tracking the enclosing function/class context. */
|
|
25
|
+
type ScopeEntry = { readonly name: string };
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Recursively walks the AST to find call expressions inside function bodies.
|
|
29
|
+
* Tracks the enclosing function scope to produce correct source IDs.
|
|
30
|
+
*/
|
|
31
|
+
function visitForCalls(
|
|
32
|
+
node: TreeSitterNode,
|
|
33
|
+
filePath: string,
|
|
34
|
+
scopeStack: readonly ScopeEntry[],
|
|
35
|
+
results: RawCall[],
|
|
36
|
+
): void {
|
|
37
|
+
// Enter a new scope for function/class definitions
|
|
38
|
+
if (node.type === "function_definition" || node.type === "class_definition") {
|
|
39
|
+
const nameNode = node.childForFieldName("name");
|
|
40
|
+
if (nameNode) {
|
|
41
|
+
const newScope = [...scopeStack, { name: nameNode.text }];
|
|
42
|
+
for (const child of node.children) {
|
|
43
|
+
visitForCalls(child, filePath, newScope, results);
|
|
44
|
+
}
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Handle decorated definitions — unwrap to the inner definition
|
|
50
|
+
if (node.type === "decorated_definition") {
|
|
51
|
+
for (const child of node.children) {
|
|
52
|
+
if (child.type === "function_definition" || child.type === "class_definition") {
|
|
53
|
+
visitForCalls(child, filePath, scopeStack, results);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Detect call expressions inside a function scope
|
|
60
|
+
if (node.type === "call" && scopeStack.length > 0) {
|
|
61
|
+
const callee = extractCalleeName(node);
|
|
62
|
+
if (callee) {
|
|
63
|
+
const sourceId = `${filePath}::${scopeStack.map((s) => s.name).join(".")}`;
|
|
64
|
+
results.push({
|
|
65
|
+
sourceId,
|
|
66
|
+
callee,
|
|
67
|
+
line: node.startPosition.row + 1,
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Recurse into children
|
|
73
|
+
for (const child of node.children) {
|
|
74
|
+
visitForCalls(child, filePath, scopeStack, results);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Extracts the callee name from a call node.
|
|
80
|
+
* Handles: simple calls (foo()), attribute calls (obj.method()), chained calls (a.b.c()).
|
|
81
|
+
*/
|
|
82
|
+
function extractCalleeName(callNode: TreeSitterNode): string | undefined {
|
|
83
|
+
const funcNode = callNode.children[0];
|
|
84
|
+
if (!funcNode) return undefined;
|
|
85
|
+
|
|
86
|
+
if (funcNode.type === "identifier") {
|
|
87
|
+
return funcNode.text;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (funcNode.type === "attribute") {
|
|
91
|
+
return flattenAttribute(funcNode);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return undefined;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Flattens a nested attribute access into a dotted string.
|
|
99
|
+
* e.g., attribute(attribute(identifier("stripe"), "charges"), "create") → "stripe.charges.create"
|
|
100
|
+
*/
|
|
101
|
+
function flattenAttribute(node: TreeSitterNode): string {
|
|
102
|
+
const parts: string[] = [];
|
|
103
|
+
let current: TreeSitterNode | null = node;
|
|
104
|
+
|
|
105
|
+
while (current?.type === "attribute") {
|
|
106
|
+
const attrName = current.children.at(-1);
|
|
107
|
+
if (attrName?.type === "identifier") {
|
|
108
|
+
parts.unshift(attrName.text);
|
|
109
|
+
}
|
|
110
|
+
current = current.children[0] ?? null;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// The leftmost part is an identifier
|
|
114
|
+
if (current?.type === "identifier") {
|
|
115
|
+
parts.unshift(current.text);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return parts.join(".");
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export { extractPythonCalls, type RawCall };
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import type { Edge, ExtractionResult, Tag } from "../../types/graph.ts";
|
|
2
|
+
import { isOk, unwrap } from "../../types/result.ts";
|
|
3
|
+
import type { Extractor } from "../extractor.ts";
|
|
4
|
+
import { createParser, type TreeSitterParser } from "../parser.ts";
|
|
5
|
+
import { parseTags } from "../tags.ts";
|
|
6
|
+
import { extractPythonCalls } from "./calls.ts";
|
|
7
|
+
import { detectPythonFrameworks } from "./frameworks.ts";
|
|
8
|
+
import { extractPythonSymbols } from "./symbols.ts";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Creates a Python extractor with an initialized tree-sitter parser.
|
|
12
|
+
* Must be called after initTreeSitter().
|
|
13
|
+
*
|
|
14
|
+
* @returns An Extractor configured for Python source files
|
|
15
|
+
*/
|
|
16
|
+
async function createPythonExtractor(): Promise<Extractor> {
|
|
17
|
+
const parser = await createParser("python");
|
|
18
|
+
|
|
19
|
+
return {
|
|
20
|
+
language: "python",
|
|
21
|
+
fileExtensions: [".py"],
|
|
22
|
+
extract: (filePath: string, source: string): Promise<ExtractionResult> =>
|
|
23
|
+
extractPython(parser, filePath, source),
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Extracts symbols, calls, imports, tags, and framework metadata from a Python file.
|
|
29
|
+
*
|
|
30
|
+
* @param parser - Initialized tree-sitter parser for Python
|
|
31
|
+
* @param filePath - Relative file path for node ID construction
|
|
32
|
+
* @param source - Raw source code
|
|
33
|
+
* @returns Complete extraction result with nodes, edges, tags, and unresolved references
|
|
34
|
+
*/
|
|
35
|
+
async function extractPython(
|
|
36
|
+
parser: TreeSitterParser,
|
|
37
|
+
filePath: string,
|
|
38
|
+
source: string,
|
|
39
|
+
): Promise<ExtractionResult> {
|
|
40
|
+
if (!source.trim()) {
|
|
41
|
+
return { nodes: [], edges: [], tags: [], unresolved: [] };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const tree = parser.parse(source);
|
|
45
|
+
|
|
46
|
+
// 1. Extract symbols (functions, classes, methods)
|
|
47
|
+
const nodes = [...extractPythonSymbols(tree, filePath, source)];
|
|
48
|
+
|
|
49
|
+
// 2. Extract calls and convert to edges
|
|
50
|
+
const rawCalls = extractPythonCalls(tree, filePath);
|
|
51
|
+
const edges: Edge[] = [];
|
|
52
|
+
const nodeIds = new Set(nodes.map((n) => n.id));
|
|
53
|
+
|
|
54
|
+
for (const call of rawCalls) {
|
|
55
|
+
// Try to resolve the callee to a known node in this file
|
|
56
|
+
const targetId = resolveCalleeInFile(call.callee, filePath, nodeIds);
|
|
57
|
+
if (targetId) {
|
|
58
|
+
edges.push({
|
|
59
|
+
sourceId: call.sourceId,
|
|
60
|
+
targetId,
|
|
61
|
+
kind: "calls",
|
|
62
|
+
certainty: "certain",
|
|
63
|
+
});
|
|
64
|
+
} else {
|
|
65
|
+
// The callee is external or unresolvable within this file.
|
|
66
|
+
// Store as an edge with the raw callee expression as target.
|
|
67
|
+
// Cross-file resolution happens in the build command.
|
|
68
|
+
edges.push({
|
|
69
|
+
sourceId: call.sourceId,
|
|
70
|
+
targetId: call.callee,
|
|
71
|
+
kind: "calls",
|
|
72
|
+
certainty: "uncertain",
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// 3. Parse lattice tags from comments above functions
|
|
78
|
+
const tags = extractTagsFromSource(source, filePath, nodes);
|
|
79
|
+
|
|
80
|
+
// 4. Detect framework patterns and attach route metadata to nodes
|
|
81
|
+
const frameworks = detectPythonFrameworks(tree, filePath);
|
|
82
|
+
for (const detection of frameworks) {
|
|
83
|
+
if (detection.route) {
|
|
84
|
+
const node = nodes.find((n) => n.name === detection.functionName);
|
|
85
|
+
if (node) {
|
|
86
|
+
const idx = nodes.indexOf(node);
|
|
87
|
+
nodes[idx] = {
|
|
88
|
+
...node,
|
|
89
|
+
metadata: { ...node.metadata, route: detection.route },
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return { nodes, edges, tags, unresolved: [] };
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Resolves a callee name to a node ID within the same file.
|
|
100
|
+
* Handles simple names (bar → filePath::bar) and self.method references.
|
|
101
|
+
*/
|
|
102
|
+
function resolveCalleeInFile(
|
|
103
|
+
callee: string,
|
|
104
|
+
filePath: string,
|
|
105
|
+
nodeIds: Set<string>,
|
|
106
|
+
): string | undefined {
|
|
107
|
+
// Direct match: callee is a function name in this file
|
|
108
|
+
const directId = `${filePath}::${callee}`;
|
|
109
|
+
if (nodeIds.has(directId)) return directId;
|
|
110
|
+
|
|
111
|
+
// self.method → try ClassName.method for each class in this file
|
|
112
|
+
if (callee.startsWith("self.")) {
|
|
113
|
+
const methodName = callee.slice(5);
|
|
114
|
+
for (const id of nodeIds) {
|
|
115
|
+
if (id.endsWith(`.${methodName}`) && id.startsWith(`${filePath}::`)) {
|
|
116
|
+
return id;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return undefined;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Extracts lattice tags by finding comment blocks directly above function definitions.
|
|
126
|
+
* Associates each parsed tag with the node ID of the function below it.
|
|
127
|
+
*/
|
|
128
|
+
function extractTagsFromSource(
|
|
129
|
+
source: string,
|
|
130
|
+
_filePath: string,
|
|
131
|
+
nodes: readonly { readonly id: string; readonly lineStart: number }[],
|
|
132
|
+
): readonly Tag[] {
|
|
133
|
+
const lines = source.split("\n");
|
|
134
|
+
const tags: Tag[] = [];
|
|
135
|
+
|
|
136
|
+
for (const node of nodes) {
|
|
137
|
+
// Collect comment lines directly above the function (no blank lines between)
|
|
138
|
+
const commentLines: string[] = [];
|
|
139
|
+
let lineIdx = node.lineStart - 2; // lineStart is 1-based, array is 0-based
|
|
140
|
+
while (lineIdx >= 0) {
|
|
141
|
+
const line = lines[lineIdx]?.trim();
|
|
142
|
+
if (!line) break;
|
|
143
|
+
if (line.startsWith("#") || line.startsWith("//") || line.startsWith("/*")) {
|
|
144
|
+
commentLines.unshift(line);
|
|
145
|
+
lineIdx--;
|
|
146
|
+
} else if (line.startsWith("@")) {
|
|
147
|
+
// Skip decorators — they're between the tags and the function
|
|
148
|
+
lineIdx--;
|
|
149
|
+
} else {
|
|
150
|
+
break;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (commentLines.length === 0) continue;
|
|
155
|
+
|
|
156
|
+
const parseResult = parseTags(commentLines.join("\n"));
|
|
157
|
+
if (isOk(parseResult)) {
|
|
158
|
+
for (const parsed of unwrap(parseResult)) {
|
|
159
|
+
tags.push({
|
|
160
|
+
nodeId: node.id,
|
|
161
|
+
kind: parsed.kind,
|
|
162
|
+
value: parsed.value,
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return tags;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
export { createPythonExtractor };
|