@monoes/graph 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/src/analyze.d.ts +23 -0
  2. package/dist/src/analyze.d.ts.map +1 -0
  3. package/dist/src/analyze.js +105 -0
  4. package/dist/src/analyze.js.map +1 -0
  5. package/dist/src/build.d.ts +8 -0
  6. package/dist/src/build.d.ts.map +1 -0
  7. package/dist/src/build.js +59 -0
  8. package/dist/src/build.js.map +1 -0
  9. package/dist/src/cache.d.ts +10 -0
  10. package/dist/src/cache.d.ts.map +1 -0
  11. package/dist/src/cache.js +34 -0
  12. package/dist/src/cache.js.map +1 -0
  13. package/dist/src/cluster.d.ts +8 -0
  14. package/dist/src/cluster.d.ts.map +1 -0
  15. package/dist/src/cluster.js +50 -0
  16. package/dist/src/cluster.js.map +1 -0
  17. package/dist/src/detect.d.ts +8 -0
  18. package/dist/src/detect.d.ts.map +1 -0
  19. package/dist/src/detect.js +108 -0
  20. package/dist/src/detect.js.map +1 -0
  21. package/dist/src/export.d.ts +21 -0
  22. package/dist/src/export.d.ts.map +1 -0
  23. package/dist/src/export.js +68 -0
  24. package/dist/src/export.js.map +1 -0
  25. package/dist/src/extract/index.d.ts +20 -0
  26. package/dist/src/extract/index.d.ts.map +1 -0
  27. package/dist/src/extract/index.js +158 -0
  28. package/dist/src/extract/index.js.map +1 -0
  29. package/dist/src/extract/languages/go.d.ts +3 -0
  30. package/dist/src/extract/languages/go.d.ts.map +1 -0
  31. package/dist/src/extract/languages/go.js +181 -0
  32. package/dist/src/extract/languages/go.js.map +1 -0
  33. package/dist/src/extract/languages/python.d.ts +3 -0
  34. package/dist/src/extract/languages/python.d.ts.map +1 -0
  35. package/dist/src/extract/languages/python.js +230 -0
  36. package/dist/src/extract/languages/python.js.map +1 -0
  37. package/dist/src/extract/languages/rust.d.ts +3 -0
  38. package/dist/src/extract/languages/rust.d.ts.map +1 -0
  39. package/dist/src/extract/languages/rust.js +195 -0
  40. package/dist/src/extract/languages/rust.js.map +1 -0
  41. package/dist/src/extract/languages/typescript.d.ts +3 -0
  42. package/dist/src/extract/languages/typescript.d.ts.map +1 -0
  43. package/dist/src/extract/languages/typescript.js +295 -0
  44. package/dist/src/extract/languages/typescript.js.map +1 -0
  45. package/dist/src/extract/tree-sitter-runner.d.ts +48 -0
  46. package/dist/src/extract/tree-sitter-runner.d.ts.map +1 -0
  47. package/dist/src/extract/tree-sitter-runner.js +128 -0
  48. package/dist/src/extract/tree-sitter-runner.js.map +1 -0
  49. package/dist/src/extract/types.d.ts +7 -0
  50. package/dist/src/extract/types.d.ts.map +1 -0
  51. package/dist/src/extract/types.js +2 -0
  52. package/dist/src/extract/types.js.map +1 -0
  53. package/dist/src/index.d.ts +11 -0
  54. package/dist/src/index.d.ts.map +1 -0
  55. package/dist/src/index.js +9 -0
  56. package/dist/src/index.js.map +1 -0
  57. package/dist/src/pipeline.d.ts +16 -0
  58. package/dist/src/pipeline.d.ts.map +1 -0
  59. package/dist/src/pipeline.js +143 -0
  60. package/dist/src/pipeline.js.map +1 -0
  61. package/dist/src/types.d.ts +99 -0
  62. package/dist/src/types.d.ts.map +1 -0
  63. package/dist/src/types.js +2 -0
  64. package/dist/src/types.js.map +1 -0
  65. package/dist/tsconfig.tsbuildinfo +1 -0
  66. package/package.json +44 -0
  67. package/src/analyze.ts +122 -0
  68. package/src/build.ts +62 -0
  69. package/src/cache.ts +38 -0
  70. package/src/cluster.ts +54 -0
  71. package/src/detect.ts +123 -0
  72. package/src/export.ts +78 -0
  73. package/src/extract/index.ts +190 -0
  74. package/src/extract/languages/go.ts +206 -0
  75. package/src/extract/languages/python.ts +270 -0
  76. package/src/extract/languages/rust.ts +230 -0
  77. package/src/extract/languages/typescript.ts +344 -0
  78. package/src/extract/tree-sitter-runner.ts +165 -0
  79. package/src/extract/types.ts +7 -0
  80. package/src/index.ts +10 -0
  81. package/src/pipeline.ts +166 -0
  82. package/src/types.ts +116 -0
@@ -0,0 +1,165 @@
1
+ import { readFileSync } from 'fs';
2
+ import type { ExtractionResult } from '../types.js';
3
+ import type { LanguageExtractor } from './types.js';
4
+
5
+ // Dynamic type references — the real shapes come from node-tree-sitter at runtime.
6
+ // We use `unknown` here so that the module compiles without the optional dep installed.
7
+ type ParserInstance = {
8
+ setLanguage(lang: unknown): void;
9
+ parse(src: string): { rootNode: SyntaxNodeLike };
10
+ };
11
+
12
+ export type SyntaxNodeLike = {
13
+ type: string;
14
+ text: string;
15
+ startPosition: { row: number; column: number };
16
+ endPosition: { row: number; column: number };
17
+ children: SyntaxNodeLike[];
18
+ childForFieldName(name: string): SyntaxNodeLike | null;
19
+ descendantsOfType(type: string | string[]): SyntaxNodeLike[];
20
+ };
21
+
22
+ // ---- availability probe ----
23
+
24
+ let _treeSitterAvailable: boolean | null = null;
25
+ let _ParserCtor: (new () => ParserInstance) | null = null;
26
+
27
+ function probeTreeSitter(): void {
28
+ if (_treeSitterAvailable !== null) return;
29
+
30
+ try {
31
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
32
+ const mod = require('node-tree-sitter') as { default?: unknown } | unknown;
33
+ const ctor =
34
+ (mod as { default?: unknown }).default !== undefined
35
+ ? (mod as { default: unknown }).default
36
+ : mod;
37
+ if (typeof ctor === 'function') {
38
+ _ParserCtor = ctor as new () => ParserInstance;
39
+ _treeSitterAvailable = true;
40
+ } else {
41
+ _treeSitterAvailable = false;
42
+ }
43
+ } catch {
44
+ _treeSitterAvailable = false;
45
+ }
46
+ }
47
+
48
+ /**
49
+ * Returns true when node-tree-sitter is installed and loadable.
50
+ */
51
+ export function isTreeSitterAvailable(): boolean {
52
+ probeTreeSitter();
53
+ return _treeSitterAvailable === true;
54
+ }
55
+
56
+ // ---- language grammar loader ----
57
+
58
+ const LANGUAGE_MODULE_MAP: Record<string, string> = {
59
+ typescript: 'tree-sitter-typescript',
60
+ tsx: 'tree-sitter-typescript',
61
+ javascript: 'tree-sitter-javascript',
62
+ jsx: 'tree-sitter-javascript',
63
+ python: 'tree-sitter-python',
64
+ go: 'tree-sitter-go',
65
+ rust: 'tree-sitter-rust',
66
+ };
67
+
68
+ /**
69
+ * Attempts to create a configured Parser for the given language identifier.
70
+ * Returns null when tree-sitter or the grammar is not installed.
71
+ */
72
+ export function tryLoadParser(language: string): ParserInstance | null {
73
+ if (!isTreeSitterAvailable() || _ParserCtor === null) return null;
74
+
75
+ const moduleName = LANGUAGE_MODULE_MAP[language];
76
+ if (!moduleName) return null;
77
+
78
+ try {
79
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
80
+ const grammarMod = require(moduleName) as Record<string, unknown>;
81
+
82
+ // tree-sitter-typescript exposes { typescript, tsx } sub-grammars
83
+ let grammar: unknown;
84
+ if (language === 'typescript' || language === 'tsx') {
85
+ grammar =
86
+ (grammarMod as { typescript?: unknown; tsx?: unknown })[language] ??
87
+ grammarMod['default'] ??
88
+ grammarMod;
89
+ } else {
90
+ grammar = grammarMod['default'] ?? grammarMod;
91
+ }
92
+
93
+ const parser = new _ParserCtor!();
94
+ parser.setLanguage(grammar);
95
+ return parser;
96
+ } catch {
97
+ return null;
98
+ }
99
+ }
100
+
101
+ // ---- depth-first AST walker ----
102
+
103
+ export function walk(
104
+ node: SyntaxNodeLike,
105
+ visitor: (n: SyntaxNodeLike) => void,
106
+ ): void {
107
+ visitor(node);
108
+ for (const child of node.children) {
109
+ walk(child, visitor);
110
+ }
111
+ }
112
+
113
+ // ---- main entry point ----
114
+
115
+ /**
116
+ * Parses a source file and delegates to the given LanguageExtractor.
117
+ * The extractor receives the file path and raw content; it owns the AST
118
+ * traversal internally (using tryLoadParser / walk from this module).
119
+ *
120
+ * Falls back gracefully: if tree-sitter cannot be loaded the extractor is still
121
+ * called with the raw content and is expected to use its regex fallback.
122
+ */
123
+ export function parseFile(
124
+ filePath: string,
125
+ content: string,
126
+ extractor: LanguageExtractor,
127
+ ): ExtractionResult {
128
+ try {
129
+ return extractor.extract(filePath, content);
130
+ } catch (err) {
131
+ return {
132
+ nodes: [],
133
+ edges: [],
134
+ filesProcessed: 1,
135
+ fromCache: 0,
136
+ errors: [
137
+ `parseFile error for ${filePath}: ${err instanceof Error ? err.message : String(err)}`,
138
+ ],
139
+ };
140
+ }
141
+ }
142
+
143
+ /**
144
+ * Convenience helper: read a file from disk and parse it.
145
+ */
146
+ export function parseFileFromDisk(
147
+ filePath: string,
148
+ extractor: LanguageExtractor,
149
+ ): ExtractionResult {
150
+ let content: string;
151
+ try {
152
+ content = readFileSync(filePath, 'utf8');
153
+ } catch (err) {
154
+ return {
155
+ nodes: [],
156
+ edges: [],
157
+ filesProcessed: 1,
158
+ fromCache: 0,
159
+ errors: [
160
+ `Failed to read ${filePath}: ${err instanceof Error ? err.message : String(err)}`,
161
+ ],
162
+ };
163
+ }
164
+ return parseFile(filePath, content, extractor);
165
+ }
@@ -0,0 +1,7 @@
1
+ import type { ExtractionResult } from '../types.js';
2
+
3
+ export interface LanguageExtractor {
4
+ language: string;
5
+ extensions: string[];
6
+ extract(filePath: string, content: string): ExtractionResult;
7
+ }
package/src/index.ts ADDED
@@ -0,0 +1,10 @@
1
+ export { buildGraph } from './pipeline.js';
2
+ export { FileCache } from './cache.js';
3
+ export { collectFiles } from './detect.js';
4
+ export { buildGraph as buildGraphologyGraph } from './build.js';
5
+ export { detectCommunities } from './cluster.js';
6
+ export { buildAnalysis, godNodes, surprisingConnections, graphStats } from './analyze.js';
7
+ export { saveGraph, loadGraph, graphExists, getGraphPath } from './export.js';
8
+ export { isTreeSitterAvailable, tryLoadParser, parseFile, parseFileFromDisk } from './extract/tree-sitter-runner.js';
9
+ export type { LanguageExtractor } from './extract/types.js';
10
+ export type * from './types.js';
@@ -0,0 +1,166 @@
1
+ import { join } from 'path';
2
+ import { mkdirSync, readFileSync } from 'fs';
3
+ import type {
4
+ BuildOptions,
5
+ ExtractionResult,
6
+ GraphAnalysis,
7
+ SerializedGraph,
8
+ } from './types.js';
9
+ import { collectFiles } from './detect.js';
10
+ import { FileCache } from './cache.js';
11
+ import { buildGraph as buildGraphologyGraph } from './build.js';
12
+ import { detectCommunities } from './cluster.js';
13
+ import { buildAnalysis } from './analyze.js';
14
+ import { saveGraph } from './export.js';
15
+ import { typescriptExtractor } from './extract/languages/typescript.js';
16
+ import { parseFile } from './extract/tree-sitter-runner.js';
17
+ import type { LanguageExtractor } from './extract/types.js';
18
+
19
+ const DEFAULT_OUTPUT_SUBDIR = '.monobrain/graph';
20
+
21
+ // Map language identifiers to the extractors we have available.
22
+ // python and go extractors are loaded lazily when their modules exist.
23
+ const EXTRACTOR_MAP: Record<string, LanguageExtractor> = {
24
+ typescript: typescriptExtractor,
25
+ javascript: typescriptExtractor, // TS extractor handles JS via regex + tree-sitter-javascript
26
+ };
27
+
28
+ /** Attempt to load python/go extractors that may be present in the extract/languages dir. */
29
+ async function tryLoadExtractor(language: string): Promise<LanguageExtractor | undefined> {
30
+ if (EXTRACTOR_MAP[language]) return EXTRACTOR_MAP[language];
31
+ try {
32
+ const mod = await import(`./extract/languages/${language}.js`) as Record<string, LanguageExtractor | undefined>;
33
+ const extractor = (mod[`${language}Extractor`] ?? mod['default']) as LanguageExtractor | undefined;
34
+ if (extractor) EXTRACTOR_MAP[language] = extractor;
35
+ return extractor;
36
+ } catch {
37
+ return undefined;
38
+ }
39
+ }
40
+
41
+ /**
42
+ * Main entry point for building a knowledge graph from a codebase.
43
+ *
44
+ * Orchestrates file collection, per-file extraction (with caching),
45
+ * graph construction via graphology, community detection, and serialisation.
46
+ *
47
+ * @param projectPath - Absolute path to the root of the codebase to analyse.
48
+ * @param options - Optional build configuration.
49
+ * @returns - Serialized graph + analysis summary.
50
+ */
51
+ export async function buildGraph(
52
+ projectPath: string,
53
+ options: BuildOptions = {},
54
+ ): Promise<{ graph: SerializedGraph; analysis: GraphAnalysis }> {
55
+ // Resolve output directory
56
+ const outputDir = options.outputDir ?? join(projectPath, DEFAULT_OUTPUT_SUBDIR);
57
+ mkdirSync(outputDir, { recursive: true });
58
+
59
+ const cache = new FileCache(outputDir);
60
+
61
+ // 1. Collect files
62
+ const files = collectFiles(projectPath, options);
63
+
64
+ // 2. Extract nodes/edges from each file (cache-aware)
65
+ const merged: ExtractionResult = {
66
+ nodes: [],
67
+ edges: [],
68
+ hyperedges: [],
69
+ filesProcessed: 0,
70
+ fromCache: 0,
71
+ errors: [],
72
+ };
73
+
74
+ for (const file of files) {
75
+ let content: string;
76
+ try {
77
+ content = readFileSync(file.path, 'utf-8');
78
+ } catch (err) {
79
+ merged.errors.push(`Cannot read ${file.path}: ${String(err)}`);
80
+ continue;
81
+ }
82
+
83
+ const cacheKey = cache.key(file.path, content);
84
+
85
+ let result = cache.get(cacheKey);
86
+ if (result) {
87
+ merged.fromCache += 1;
88
+ } else {
89
+ const extractor = file.language
90
+ ? await tryLoadExtractor(file.language)
91
+ : undefined;
92
+
93
+ if (extractor) {
94
+ result = parseFile(file.path, content, extractor);
95
+ } else {
96
+ result = extractGeneric(file.path, content);
97
+ }
98
+ cache.set(cacheKey, result);
99
+ }
100
+
101
+ merged.nodes.push(...result.nodes);
102
+ merged.edges.push(...result.edges);
103
+ if (result.hyperedges) merged.hyperedges!.push(...result.hyperedges);
104
+ merged.filesProcessed += 1;
105
+ merged.errors.push(...result.errors);
106
+ }
107
+
108
+ // 3. Build graphology graph (dedup + stub endpoints)
109
+ const graph = buildGraphologyGraph(merged);
110
+
111
+ // 4. Community detection (Louvain with directory-based fallback)
112
+ await detectCommunities(graph);
113
+
114
+ // 5. Degree annotation
115
+ graph.forEachNode((id) => {
116
+ graph.setNodeAttribute(id, 'degree', graph.degree(id));
117
+ });
118
+
119
+ // 6. Build analysis (god nodes, surprise edges, communities, stats)
120
+ const analysis = buildAnalysis(graph, outputDir);
121
+
122
+ // 7. Persist to disk
123
+ saveGraph(graph, outputDir, projectPath);
124
+
125
+ // 8. Serialize to the public return type
126
+ const serialized: SerializedGraph = {
127
+ version: '1.0.0',
128
+ builtAt: new Date().toISOString(),
129
+ projectPath,
130
+ directed: true,
131
+ multigraph: false,
132
+ nodes: graph.nodes().map((id) => ({
133
+ id,
134
+ ...graph.getNodeAttributes(id),
135
+ })),
136
+ links: graph.edges().map((edgeId) => ({
137
+ source: graph.source(edgeId),
138
+ target: graph.target(edgeId),
139
+ ...graph.getEdgeAttributes(edgeId),
140
+ })),
141
+ };
142
+
143
+ return { graph: serialized, analysis };
144
+ }
145
+
146
+ // ---------------------------------------------------------------------------
147
+ // Internal: minimal fallback for languages without a dedicated extractor
148
+ // ---------------------------------------------------------------------------
149
+
150
+ function extractGeneric(filePath: string, content: string): ExtractionResult {
151
+ return {
152
+ nodes: [
153
+ {
154
+ id: filePath,
155
+ label: filePath.split('/').pop() ?? filePath,
156
+ fileType: 'code',
157
+ sourceFile: filePath,
158
+ linesOfCode: content.split('\n').length,
159
+ },
160
+ ],
161
+ edges: [],
162
+ filesProcessed: 1,
163
+ fromCache: 0,
164
+ errors: [],
165
+ };
166
+ }
package/src/types.ts ADDED
@@ -0,0 +1,116 @@
1
+ // Confidence levels for graph edges — matches graphify's audit trail
2
+ export type Confidence = 'EXTRACTED' | 'INFERRED' | 'AMBIGUOUS';
3
+
4
+ // Node file types
5
+ export type FileType = 'code' | 'document' | 'paper' | 'image' | 'unknown';
6
+
7
+ // Node in the knowledge graph
8
+ export interface GraphNode {
9
+ id: string;
10
+ label: string;
11
+ fileType: FileType;
12
+ sourceFile: string;
13
+ sourceLocation?: string; // e.g. "L42"
14
+ community?: number;
15
+ degree?: number;
16
+ // Extra attributes for specific node types
17
+ [key: string]: unknown;
18
+ }
19
+
20
+ // Edge in the knowledge graph
21
+ export interface GraphEdge {
22
+ source: string;
23
+ target: string;
24
+ relation: string; // 'calls', 'imports', 'uses', 'contains', 'implements', etc.
25
+ confidence: Confidence;
26
+ confidenceScore?: number; // 0.0-1.0 for INFERRED edges
27
+ sourceFile?: string;
28
+ sourceLocation?: string;
29
+ weight?: number;
30
+ }
31
+
32
+ // Raw extraction result from a single file or set of files
33
+ export interface ExtractionResult {
34
+ nodes: GraphNode[];
35
+ edges: GraphEdge[];
36
+ hyperedges?: HyperEdge[];
37
+ filesProcessed: number;
38
+ fromCache: number;
39
+ errors: string[];
40
+ }
41
+
42
+ // Hyperedge (group relationship)
43
+ export interface HyperEdge {
44
+ label: string;
45
+ nodes: string[];
46
+ confidence: Confidence;
47
+ confidenceScore?: number;
48
+ }
49
+
50
+ // Graph analysis result
51
+ export interface GraphAnalysis {
52
+ godNodes: GodNode[];
53
+ surprises: SurpriseEdge[];
54
+ communities: Record<number, string[]>;
55
+ stats: GraphStats;
56
+ }
57
+
58
+ export interface GodNode {
59
+ id: string;
60
+ label: string;
61
+ degree: number;
62
+ community?: number;
63
+ sourceFile: string;
64
+ neighbors: string[];
65
+ }
66
+
67
+ export interface SurpriseEdge {
68
+ from: string;
69
+ fromCommunity: number;
70
+ fromFile: string;
71
+ to: string;
72
+ toCommunity: number;
73
+ toFile: string;
74
+ relation: string;
75
+ confidence: Confidence;
76
+ score: number;
77
+ }
78
+
79
+ export interface GraphStats {
80
+ nodes: number;
81
+ edges: number;
82
+ communities: number;
83
+ confidence: Record<Confidence, number>;
84
+ fileTypes: Record<string, number>;
85
+ topRelations: Record<string, number>;
86
+ isDirected: boolean;
87
+ graphPath?: string;
88
+ }
89
+
90
+ // Serialized graph format (JSON-compatible)
91
+ export interface SerializedGraph {
92
+ version: string;
93
+ builtAt: string;
94
+ projectPath: string;
95
+ nodes: Array<{ id: string } & Record<string, unknown>>;
96
+ links: Array<{ source: string; target: string } & Record<string, unknown>>;
97
+ directed: boolean;
98
+ multigraph: boolean;
99
+ }
100
+
101
+ // Build options
102
+ export interface BuildOptions {
103
+ codeOnly?: boolean;
104
+ outputDir?: string; // defaults to <projectPath>/.monobrain/graph/
105
+ maxFileSizeBytes?: number; // defaults to 500KB
106
+ excludePatterns?: string[];
107
+ languages?: string[];
108
+ }
109
+
110
+ // File classification
111
+ export interface ClassifiedFile {
112
+ path: string;
113
+ fileType: FileType;
114
+ language?: string;
115
+ sizeBytes: number;
116
+ }