@mishasinitcyn/betterrank 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js CHANGED
@@ -1,8 +1,158 @@
1
1
  import { readFile } from 'fs/promises';
2
- import { join, dirname, relative, sep } from 'path';
2
+ import { join, dirname, relative, sep, basename } from 'path';
3
3
  import { CodeIndexCache } from './cache.js';
4
4
  import { rankedSymbols } from './graph.js';
5
5
 
6
+ // ── Orphan false-positive filters ──────────────────────────────────────────
7
+ //
8
+ // Orphan detection finds files/symbols with no cross-file connections.
9
+ // Many of these are false positives: entry points, config, tests, framework
10
+ // hooks, etc. that are invoked by runtimes, not by other source files.
11
+ // These filters aggressively exclude them (at the cost of some true positives).
12
+
13
+ // File basenames (without extension) that are runtime entry points, config,
14
+ // or package markers — they have no incoming IMPORTS because the runtime
15
+ // loads them directly, not because they're dead.
16
+ const ORPHAN_EXCLUDED_BASENAMES = new Set([
17
+ 'index', 'main', 'app', 'server', 'cli', 'mod', 'lib',
18
+ 'manage', 'wsgi', 'asgi', 'handler', 'lambda',
19
+ '__init__', '__main__',
20
+ 'config', 'settings', 'conf', 'conftest', 'setup',
21
+ 'gulpfile', 'gruntfile', 'makefile', 'rakefile', 'taskfile',
22
+ ]);
23
+
24
+ // Path segments indicating test/spec directories
25
+ const TEST_PATH_SEGMENTS = [
26
+ '/test/', '/tests/', '/__tests__/', '/spec/', '/specs/',
27
+ '/testing/', '/fixtures/', '/mocks/', '/e2e/', '/cypress/',
28
+ ];
29
+
30
+ function isTestFile(filePath) {
31
+ const lower = '/' + filePath.toLowerCase();
32
+ for (const seg of TEST_PATH_SEGMENTS) {
33
+ if (lower.includes(seg)) return true;
34
+ }
35
+ const stem = basename(filePath).replace(/\.[^.]+$/, '').toLowerCase();
36
+ return (
37
+ stem.startsWith('test_') || stem.startsWith('test.') ||
38
+ stem.endsWith('.test') || stem.endsWith('.spec') ||
39
+ stem.endsWith('_test') || stem.endsWith('_spec')
40
+ );
41
+ }
42
+
43
+ function isOrphanFalsePositiveFile(filePath) {
44
+ const base = basename(filePath);
45
+ const stem = base.replace(/\.[^.]+$/, '').toLowerCase();
46
+
47
+ if (ORPHAN_EXCLUDED_BASENAMES.has(stem)) return true;
48
+
49
+ // Dotfiles are always config (.eslintrc, .prettierrc, etc.)
50
+ if (base.startsWith('.')) return true;
51
+
52
+ // Type definition files (.d.ts) — consumed by the compiler, not by imports
53
+ if (filePath.endsWith('.d.ts')) return true;
54
+
55
+ // Config files with compound names (vite.config.ts, jest.config.js, etc.)
56
+ if (/[./]config$/i.test(stem) || /\.rc$/i.test(stem)) return true;
57
+
58
+ // Test/spec files — invoked by test runners
59
+ if (isTestFile(filePath)) return true;
60
+
61
+ return false;
62
+ }
63
+
64
+ // Symbol names that are entry points, lifecycle hooks, or framework-called.
65
+ const FRAMEWORK_INVOKED_SYMBOLS = new Set([
66
+ 'main', 'run', 'start', 'serve', 'handler', 'execute', 'app',
67
+ 'setup', 'teardown', 'setUp', 'tearDown',
68
+ 'beforeAll', 'afterAll', 'beforeEach', 'afterEach', 'before', 'after',
69
+ 'constructor', 'init', 'initialize', 'configure', 'register',
70
+ 'middleware', 'plugin', 'default', 'module', 'exports',
71
+ ]);
72
+
73
+ /**
74
+ * Detect if a function signature is likely a class/instance method rather
75
+ * than a standalone function. Method calls (obj.method()) are intentionally
76
+ * not tracked as references (too noisy without type info), so all methods
77
+ * appear orphaned. We exclude them to avoid flooding the results.
78
+ */
79
+ function isLikelyMethod(signature, filePath) {
80
+ if (!signature) return false;
81
+ const s = signature.trimStart();
82
+
83
+ const ext = filePath.substring(filePath.lastIndexOf('.'));
84
+
85
+ // JS/TS: standalone functions always use the `function` keyword.
86
+ // Class methods don't: `async ensure()`, `getGraph()`, `constructor()`.
87
+ // Arrow functions assigned to vars are kind='variable', not 'function',
88
+ // so they don't reach this check.
89
+ if (['.js', '.mjs', '.cjs', '.jsx', '.ts', '.tsx'].includes(ext)) {
90
+ return !/^(export\s+)?(default\s+)?(async\s+)?function[\s(]/.test(s);
91
+ }
92
+
93
+ // Python: methods have self or cls as first parameter
94
+ if (ext === '.py') {
95
+ return /\(\s*(self|cls)\s*[,)]/.test(s);
96
+ }
97
+
98
+ // Java/C#/Go: harder to detect without parent context — don't filter
99
+ return false;
100
+ }
101
+
102
+ function isOrphanFalsePositiveSymbol(name, kind, filePath, signature) {
103
+ if (FRAMEWORK_INVOKED_SYMBOLS.has(name)) return true;
104
+
105
+ // Python dunders — called implicitly by the runtime
106
+ if (name.startsWith('__') && name.endsWith('__')) return true;
107
+
108
+ // Test functions — called by test runners
109
+ if (name.startsWith('test_') || name.startsWith('Test') ||
110
+ name.startsWith('spec_') || name.startsWith('Spec')) return true;
111
+
112
+ // Very short names — too generic, ambiguity cap probably suppressed real refs
113
+ if (name.length <= 2) return true;
114
+
115
+ // Class/instance methods — obj.method() calls aren't tracked as references,
116
+ // so every method appears orphaned. Filter them out.
117
+ if (kind === 'function' && isLikelyMethod(signature, filePath)) return true;
118
+
119
+ // Symbols in test files — all invoked by the test runner
120
+ if (isTestFile(filePath)) return true;
121
+
122
+ // Symbols in entry point / config files — reachable via runtime
123
+ if (isOrphanFalsePositiveFile(filePath)) return true;
124
+
125
+ // Symbol name matches file basename — likely the primary export
126
+ const fileBase = basename(filePath).replace(/\.[^.]+$/, '');
127
+ if (name === fileBase || name.toLowerCase() === fileBase.toLowerCase()) return true;
128
+
129
+ return false;
130
+ }
131
+
132
+ /**
133
+ * Find file nodes in the graph that look similar to the given path.
134
+ * Uses basename matching and substring matching on the full path.
135
+ */
136
+ function findSimilarFiles(graph, filePath, maxSuggestions = 5) {
137
+ if (!graph) return [];
138
+ const base = basename(filePath);
139
+ const baseLower = base.toLowerCase();
140
+ const pathLower = filePath.toLowerCase();
141
+ const suggestions = [];
142
+
143
+ graph.forEachNode((node, attrs) => {
144
+ if (attrs.type !== 'file') return;
145
+ const nodeLower = node.toLowerCase();
146
+ const nodeBase = basename(node).toLowerCase();
147
+ // Exact basename match or basename contains query
148
+ if (nodeBase === baseLower || nodeBase.includes(baseLower) || nodeLower.includes(pathLower)) {
149
+ suggestions.push(node);
150
+ }
151
+ });
152
+
153
+ return suggestions.slice(0, maxSuggestions);
154
+ }
155
+
6
156
  /**
7
157
  * Apply offset/limit pagination to an array.
8
158
  * Returns { items, total } where total is the unpaginated count.
@@ -79,12 +229,17 @@ class CodeIndex {
79
229
  * @returns {{content, shownFiles, shownSymbols, totalFiles, totalSymbols}|{total: number}}
80
230
  */
81
231
  async map({ focusFiles = [], offset, limit, count = false, structured = false } = {}) {
82
- await this._ensureReady();
232
+ const ensureResult = await this._ensureReady();
83
233
  const graph = this.cache.getGraph();
84
234
  if (!graph || graph.order === 0) {
85
- if (count) return { total: 0 };
86
- if (structured) return { files: [], shownFiles: 0, shownSymbols: 0, totalFiles: 0, totalSymbols: 0 };
87
- return { content: '(empty index)', shownFiles: 0, shownSymbols: 0, totalFiles: 0, totalSymbols: 0 };
235
+ const diagnostics = {
236
+ root: this.projectRoot,
237
+ filesScanned: ensureResult.totalScanned || 0,
238
+ extensions: this.cache.extensions.join(', '),
239
+ };
240
+ if (count) return { total: 0, diagnostics };
241
+ if (structured) return { files: [], shownFiles: 0, shownSymbols: 0, totalFiles: 0, totalSymbols: 0, diagnostics };
242
+ return { content: '(empty index)', shownFiles: 0, shownSymbols: 0, totalFiles: 0, totalSymbols: 0, diagnostics };
88
243
  }
89
244
 
90
245
  // Count totals from the graph
@@ -362,7 +517,11 @@ class CodeIndex {
362
517
  async dependencies({ file, offset, limit, count = false }) {
363
518
  await this._ensureReady();
364
519
  const graph = this.cache.getGraph();
365
- if (!graph || !graph.hasNode(file)) return count ? { total: 0 } : [];
520
+ if (!graph || !graph.hasNode(file)) {
521
+ const suggestions = findSimilarFiles(graph, file);
522
+ if (count) return { total: 0, fileNotFound: true, suggestions };
523
+ return { items: [], fileNotFound: true, suggestions };
524
+ }
366
525
 
367
526
  const fileScores = this._getFileScores();
368
527
 
@@ -396,7 +555,11 @@ class CodeIndex {
396
555
  async dependents({ file, offset, limit, count = false }) {
397
556
  await this._ensureReady();
398
557
  const graph = this.cache.getGraph();
399
- if (!graph || !graph.hasNode(file)) return count ? { total: 0 } : [];
558
+ if (!graph || !graph.hasNode(file)) {
559
+ const suggestions = findSimilarFiles(graph, file);
560
+ if (count) return { total: 0, fileNotFound: true, suggestions };
561
+ return { items: [], fileNotFound: true, suggestions };
562
+ }
400
563
 
401
564
  const fileScores = this._getFileScores();
402
565
 
@@ -450,9 +613,10 @@ class CodeIndex {
450
613
  await this._ensureReady();
451
614
  const graph = this.cache.getGraph();
452
615
  if (!graph || !graph.hasNode(file)) {
616
+ const suggestions = findSimilarFiles(graph, file);
453
617
  return count
454
- ? { totalFiles: 0, totalSymbols: 0, totalEdges: 0 }
455
- : { files: [], symbols: [], edges: [] };
618
+ ? { totalFiles: 0, totalSymbols: 0, totalEdges: 0, fileNotFound: true, suggestions }
619
+ : { files: [], symbols: [], edges: [], fileNotFound: true, suggestions };
456
620
  }
457
621
 
458
622
  // BFS over file nodes, following outgoing IMPORTS edges (dependencies)
@@ -597,6 +761,153 @@ class CodeIndex {
597
761
  };
598
762
  }
599
763
 
764
+ /**
765
+ * Find orphaned files or symbols — nodes with no cross-file connections.
766
+ *
767
+ * level='file': files with zero IMPORTS edges (neither importing nor imported).
768
+ * These are the "satellites" in the graph UI.
769
+ *
770
+ * level='symbol': symbols with no incoming REFERENCES from outside their own file.
771
+ * Dead code candidates — defined but never used cross-file.
772
+ *
773
+ * False positives (entry points, config files, test files, framework hooks,
774
+ * dunders, etc.) are excluded by default.
775
+ *
776
+ * @param {object} [opts]
777
+ * @param {'file'|'symbol'} [opts.level='file'] - Granularity
778
+ * @param {string} [opts.kind] - Filter symbols by kind (only for level='symbol')
779
+ * @param {number} [opts.offset] - Skip first N results
780
+ * @param {number} [opts.limit] - Max results to return
781
+ * @param {boolean} [opts.count=false] - If true, return only { total }
782
+ * @returns {Array|{total: number}}
783
+ */
784
+ async orphans({ level = 'file', kind, offset, limit, count = false } = {}) {
785
+ await this._ensureReady();
786
+ const graph = this.cache.getGraph();
787
+ if (!graph || graph.order === 0) return count ? { total: 0 } : [];
788
+
789
+ if (level === 'file') {
790
+ const results = [];
791
+ graph.forEachNode((node, attrs) => {
792
+ if (attrs.type !== 'file') return;
793
+
794
+ // Skip false positives: entry points, config, tests
795
+ if (isOrphanFalsePositiveFile(node)) return;
796
+
797
+ // Check for any IMPORTS edge (in or out)
798
+ let hasImport = false;
799
+ graph.forEachEdge(node, (_edge, edgeAttrs) => {
800
+ if (!hasImport && edgeAttrs.type === 'IMPORTS') hasImport = true;
801
+ });
802
+
803
+ if (!hasImport) {
804
+ results.push({ file: node, symbolCount: attrs.symbolCount || 0 });
805
+ }
806
+ });
807
+
808
+ // Meatier files first — more likely to be real orphans worth investigating
809
+ results.sort((a, b) => b.symbolCount - a.symbolCount);
810
+ if (count) return { total: results.length };
811
+ return paginate(results, { offset, limit }).items;
812
+ }
813
+
814
+ if (level === 'symbol') {
815
+ const results = [];
816
+ graph.forEachNode((node, attrs) => {
817
+ if (attrs.type !== 'symbol') return;
818
+ if (kind && attrs.kind !== kind) return;
819
+
820
+ // Skip false positives: framework hooks, dunders, test funcs, methods, etc.
821
+ if (isOrphanFalsePositiveSymbol(attrs.name, attrs.kind, attrs.file, attrs.signature)) return;
822
+
823
+ // Check for any incoming REFERENCES from a different file
824
+ let hasExternalRef = false;
825
+ graph.forEachInEdge(node, (_edge, edgeAttrs, source) => {
826
+ if (hasExternalRef) return;
827
+ if (edgeAttrs.type !== 'REFERENCES') return;
828
+ try {
829
+ const sourceFile = graph.getNodeAttribute(source, 'file') || source;
830
+ if (sourceFile !== attrs.file) hasExternalRef = true;
831
+ } catch {
832
+ if (source !== attrs.file) hasExternalRef = true;
833
+ }
834
+ });
835
+
836
+ if (!hasExternalRef) {
837
+ results.push({
838
+ name: attrs.name,
839
+ kind: attrs.kind,
840
+ file: attrs.file,
841
+ lineStart: attrs.lineStart,
842
+ signature: attrs.signature,
843
+ });
844
+ }
845
+ });
846
+
847
+ // Group by file, then by line within file
848
+ results.sort((a, b) => a.file.localeCompare(b.file) || a.lineStart - b.lineStart);
849
+ if (count) return { total: results.length };
850
+ return paginate(results, { offset, limit }).items;
851
+ }
852
+
853
+ throw new Error(`Unknown level: "${level}". Use "file" or "symbol".`);
854
+ }
855
+
856
+ /**
857
+ * File-level dependency graph for visualization.
858
+ * Returns nodes (files) ranked by PageRank and IMPORTS edges between them.
859
+ *
860
+ * @param {object} opts
861
+ * @param {number} [opts.limit] - Max nodes to return (default: 500)
862
+ * @returns {{ nodes: Array<{id, label, category, score}>, edges: Array<{source, target}> }}
863
+ */
864
+ async graph({ limit = 500 } = {}) {
865
+ await this._ensureReady();
866
+ const graph = this.cache.getGraph();
867
+ if (!graph || graph.order === 0) {
868
+ return { nodes: [], edges: [] };
869
+ }
870
+
871
+ const fileScores = this._getFileScores();
872
+
873
+ // Collect file nodes with scores, sorted by PageRank
874
+ const fileEntries = [];
875
+ graph.forEachNode((node, attrs) => {
876
+ if (attrs.type !== 'file') return;
877
+ fileEntries.push({ id: node, score: fileScores.get(node) || 0 });
878
+ });
879
+ fileEntries.sort((a, b) => b.score - a.score);
880
+
881
+ // Cap to limit
882
+ const capped = fileEntries.slice(0, limit);
883
+ const cappedSet = new Set(capped.map(f => f.id));
884
+
885
+ // Build nodes with category (first path segment) and label (filename)
886
+ const nodes = capped.map(f => {
887
+ const parts = f.id.split('/');
888
+ const category = parts.length > 1 ? parts[0] : 'root';
889
+ const label = parts[parts.length - 1].replace(/\.[^.]+$/, '');
890
+ return { id: f.id, label, category, score: f.score };
891
+ });
892
+
893
+ // Collect IMPORTS edges between capped files
894
+ const edges = [];
895
+ const edgeSet = new Set();
896
+ for (const f of capped) {
897
+ graph.forEachOutEdge(f.id, (_edge, attrs, source, target) => {
898
+ if (attrs.type !== 'IMPORTS') return;
899
+ if (!cappedSet.has(target)) return;
900
+ const key = `${source}->${target}`;
901
+ if (!edgeSet.has(key)) {
902
+ edgeSet.add(key);
903
+ edges.push({ source, target });
904
+ }
905
+ });
906
+ }
907
+
908
+ return { nodes, edges };
909
+ }
910
+
600
911
  /**
601
912
  * Force a full rebuild.
602
913
  */
package/src/server.js CHANGED
@@ -248,6 +248,33 @@ const routes = {
248
248
  json(res, result);
249
249
  },
250
250
 
251
+ 'GET /api/graph': async (req, res) => {
252
+ if (!requireIndex(res)) return;
253
+ const p = params(req.url);
254
+ const result = await currentIndex.graph({
255
+ limit: p.getInt('limit', 500),
256
+ });
257
+ json(res, result);
258
+ },
259
+
260
+ 'GET /api/orphans': async (req, res) => {
261
+ if (!requireIndex(res)) return;
262
+ const p = params(req.url);
263
+ const level = p.get('level', 'file');
264
+ const results = await currentIndex.orphans({
265
+ level,
266
+ kind: p.get('kind', undefined),
267
+ offset: p.getInt('offset', undefined),
268
+ limit: p.getInt('limit', 50),
269
+ });
270
+ const total = await currentIndex.orphans({
271
+ level,
272
+ kind: p.get('kind', undefined),
273
+ count: true,
274
+ });
275
+ json(res, { results, total: total.total });
276
+ },
277
+
251
278
  'GET /api/structure': async (req, res) => {
252
279
  if (!requireIndex(res)) return;
253
280
  const p = params(req.url);