mcp-vector-search 1.0.3__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +48 -1
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +35 -0
  7. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  8. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  9. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  10. mcp_vector_search/analysis/collectors/smells.py +325 -0
  11. mcp_vector_search/analysis/debt.py +516 -0
  12. mcp_vector_search/analysis/interpretation.py +685 -0
  13. mcp_vector_search/analysis/metrics.py +74 -1
  14. mcp_vector_search/analysis/reporters/__init__.py +3 -1
  15. mcp_vector_search/analysis/reporters/console.py +424 -0
  16. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  17. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  18. mcp_vector_search/analysis/storage/__init__.py +93 -0
  19. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  20. mcp_vector_search/analysis/storage/schema.py +245 -0
  21. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  22. mcp_vector_search/analysis/trends.py +308 -0
  23. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  24. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  25. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  26. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  27. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  28. mcp_vector_search/cli/commands/analyze.py +665 -11
  29. mcp_vector_search/cli/commands/chat.py +193 -0
  30. mcp_vector_search/cli/commands/index.py +600 -2
  31. mcp_vector_search/cli/commands/index_background.py +467 -0
  32. mcp_vector_search/cli/commands/search.py +194 -1
  33. mcp_vector_search/cli/commands/setup.py +64 -13
  34. mcp_vector_search/cli/commands/status.py +302 -3
  35. mcp_vector_search/cli/commands/visualize/cli.py +26 -10
  36. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +8 -4
  37. mcp_vector_search/cli/commands/visualize/graph_builder.py +167 -234
  38. mcp_vector_search/cli/commands/visualize/server.py +304 -15
  39. mcp_vector_search/cli/commands/visualize/templates/base.py +60 -6
  40. mcp_vector_search/cli/commands/visualize/templates/scripts.py +2100 -65
  41. mcp_vector_search/cli/commands/visualize/templates/styles.py +1297 -88
  42. mcp_vector_search/cli/didyoumean.py +5 -0
  43. mcp_vector_search/cli/main.py +16 -5
  44. mcp_vector_search/cli/output.py +134 -5
  45. mcp_vector_search/config/thresholds.py +89 -1
  46. mcp_vector_search/core/__init__.py +16 -0
  47. mcp_vector_search/core/database.py +39 -2
  48. mcp_vector_search/core/embeddings.py +24 -0
  49. mcp_vector_search/core/git.py +380 -0
  50. mcp_vector_search/core/indexer.py +445 -84
  51. mcp_vector_search/core/llm_client.py +9 -4
  52. mcp_vector_search/core/models.py +88 -1
  53. mcp_vector_search/core/relationships.py +473 -0
  54. mcp_vector_search/core/search.py +1 -1
  55. mcp_vector_search/mcp/server.py +795 -4
  56. mcp_vector_search/parsers/python.py +285 -5
  57. mcp_vector_search/utils/gitignore.py +0 -3
  58. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +3 -2
  59. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/RECORD +62 -39
  60. mcp_vector_search/cli/commands/visualize.py.original +0 -2536
  61. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +0 -0
  62. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +0 -0
  63. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,463 @@
1
+ """LCOM4 cohesion metric collector.
2
+
3
+ LCOM4 (Lack of Cohesion of Methods version 4) measures class cohesion
4
+ by counting connected components in the method-attribute graph.
5
+
6
+ A class is cohesive when its methods work together using shared attributes.
7
+ LCOM4 counts how many disconnected groups of methods exist:
8
+ - LCOM4 = 1: Perfect cohesion (all methods connected)
9
+ - LCOM4 > 1: Poor cohesion (class should potentially be split)
10
+
11
+ Example:
12
+ # Cohesive class (LCOM4 = 1)
13
+ class GoodClass:
14
+ def method_a(self):
15
+ return self.x + self.y
16
+
17
+ def method_b(self):
18
+ return self.x * self.y # Shares x, y with method_a
19
+
20
+ # Incohesive class (LCOM4 = 2)
21
+ class BadClass:
22
+ def method_a(self):
23
+ return self.x + self.y # Group 1
24
+
25
+ def method_c(self):
26
+ return self.z + self.w # Group 2 (no shared attributes)
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ from dataclasses import dataclass, field
32
+ from pathlib import Path
33
+ from typing import TYPE_CHECKING
34
+
35
+ from loguru import logger
36
+
37
+ if TYPE_CHECKING:
38
+ from tree_sitter import Node
39
+
40
+
41
+ @dataclass
42
+ class MethodAttributeAccess:
43
+ """Tracks which attributes a method accesses.
44
+
45
+ Attributes:
46
+ method_name: Name of the method
47
+ attributes: Set of instance attributes accessed (e.g., {"x", "y"})
48
+ """
49
+
50
+ method_name: str
51
+ attributes: set[str] = field(default_factory=set)
52
+
53
+
54
+ @dataclass
55
+ class ClassCohesion:
56
+ """LCOM4 result for a single class.
57
+
58
+ Attributes:
59
+ class_name: Name of the class
60
+ lcom4: Number of connected components (1=cohesive, >1=incohesive)
61
+ method_count: Total number of methods in class
62
+ attribute_count: Total number of instance attributes accessed
63
+ method_attributes: Detailed mapping of method names to their attributes
64
+ """
65
+
66
+ class_name: str
67
+ lcom4: int
68
+ method_count: int
69
+ attribute_count: int
70
+ method_attributes: dict[str, set[str]] = field(default_factory=dict)
71
+
72
+
73
+ @dataclass
74
+ class FileCohesion:
75
+ """Cohesion metrics for all classes in a file.
76
+
77
+ Attributes:
78
+ file_path: Path to the analyzed file
79
+ classes: List of per-class cohesion results
80
+ avg_lcom4: Average LCOM4 across all classes
81
+ max_lcom4: Maximum LCOM4 value (worst cohesion)
82
+ """
83
+
84
+ file_path: Path
85
+ classes: list[ClassCohesion] = field(default_factory=list)
86
+ avg_lcom4: float = 0.0
87
+ max_lcom4: int = 0
88
+
89
+
90
+ class UnionFind:
91
+ """Union-Find data structure for connected components.
92
+
93
+ Efficiently tracks and merges disjoint sets to count connected
94
+ components in the method-attribute graph.
95
+
96
+ Example:
97
+ uf = UnionFind(["method_a", "method_b", "method_c"])
98
+ uf.union("method_a", "method_b") # Connect a and b
99
+ uf.count_components() # Returns 2 (groups: {a,b}, {c})
100
+ """
101
+
102
+ def __init__(self, items: list[str]) -> None:
103
+ """Initialize union-find with independent items.
104
+
105
+ Args:
106
+ items: List of method names to track
107
+ """
108
+ self.parent = {item: item for item in items}
109
+ self.rank = dict.fromkeys(items, 0)
110
+
111
+ def find(self, item: str) -> str:
112
+ """Find root of item's set with path compression.
113
+
114
+ Args:
115
+ item: Method name to find root for
116
+
117
+ Returns:
118
+ Root of the set containing item
119
+ """
120
+ if self.parent[item] != item:
121
+ self.parent[item] = self.find(self.parent[item]) # Path compression
122
+ return self.parent[item]
123
+
124
+ def union(self, item1: str, item2: str) -> None:
125
+ """Merge sets containing item1 and item2.
126
+
127
+ Uses union by rank for efficiency.
128
+
129
+ Args:
130
+ item1: First method name
131
+ item2: Second method name
132
+ """
133
+ root1, root2 = self.find(item1), self.find(item2)
134
+ if root1 != root2:
135
+ # Union by rank: attach smaller tree under larger
136
+ if self.rank[root1] < self.rank[root2]:
137
+ root1, root2 = root2, root1
138
+ self.parent[root2] = root1
139
+ if self.rank[root1] == self.rank[root2]:
140
+ self.rank[root1] += 1
141
+
142
+ def count_components(self) -> int:
143
+ """Count number of connected components.
144
+
145
+ Returns:
146
+ Number of disjoint sets (LCOM4 value)
147
+ """
148
+ return len({self.find(item) for item in self.parent})
149
+
150
+
151
+ class LCOM4Calculator:
152
+ """Calculate LCOM4 cohesion metric for Python classes.
153
+
154
+ Algorithm:
155
+ 1. For each class, extract methods and their attribute accesses
156
+ 2. Build undirected graph: nodes=methods, edges=shared attributes
157
+ 3. Count connected components using Union-Find
158
+ 4. LCOM4 = number of components (1=cohesive, >1=potentially split)
159
+
160
+ Example:
161
+ calculator = LCOM4Calculator()
162
+ result = calculator.calculate_file_cohesion(
163
+ Path("my_file.py"),
164
+ "class MyClass:\\n def foo(self): return self.x\\n"
165
+ )
166
+ print(f"LCOM4: {result.classes[0].lcom4}")
167
+ """
168
+
169
+ def __init__(self) -> None:
170
+ """Initialize LCOM4 calculator with tree-sitter parser."""
171
+ self._parser = None
172
+ self._language = None
173
+ self._initialize_parser()
174
+
175
+ def _initialize_parser(self) -> None:
176
+ """Initialize Tree-sitter parser for Python."""
177
+ try:
178
+ from tree_sitter_language_pack import get_language, get_parser
179
+
180
+ self._language = get_language("python")
181
+ self._parser = get_parser("python")
182
+ logger.debug("Python Tree-sitter parser initialized for LCOM4")
183
+ except Exception as e:
184
+ logger.warning(f"Tree-sitter initialization failed: {e}")
185
+ self._parser = None
186
+ self._language = None
187
+
188
+ def calculate_file_cohesion(self, file_path: Path, content: str) -> FileCohesion:
189
+ """Calculate LCOM4 for all classes in a file.
190
+
191
+ Args:
192
+ file_path: Path to the file (for reporting)
193
+ content: Source code content as string
194
+
195
+ Returns:
196
+ FileCohesion with per-class LCOM4 results
197
+ """
198
+ if not self._parser:
199
+ logger.warning("Tree-sitter parser not available, returning empty result")
200
+ return FileCohesion(file_path=file_path)
201
+
202
+ tree = self._parser.parse(bytes(content, "utf8"))
203
+ classes = self._find_classes(tree.root_node)
204
+
205
+ class_cohesions = []
206
+ for class_node in classes:
207
+ cohesion = self._calculate_class_cohesion(class_node, content)
208
+ if cohesion:
209
+ class_cohesions.append(cohesion)
210
+
211
+ # Calculate aggregate metrics
212
+ if class_cohesions:
213
+ avg_lcom4 = sum(c.lcom4 for c in class_cohesions) / len(class_cohesions)
214
+ max_lcom4 = max(c.lcom4 for c in class_cohesions)
215
+ else:
216
+ avg_lcom4 = 0.0
217
+ max_lcom4 = 0
218
+
219
+ return FileCohesion(
220
+ file_path=file_path,
221
+ classes=class_cohesions,
222
+ avg_lcom4=avg_lcom4,
223
+ max_lcom4=max_lcom4,
224
+ )
225
+
226
+ def _find_classes(self, root: Node) -> list[Node]:
227
+ """Find all class definitions in the AST.
228
+
229
+ Args:
230
+ root: Root AST node
231
+
232
+ Returns:
233
+ List of class_definition nodes
234
+ """
235
+ classes = []
236
+
237
+ def visit(node: Node) -> None:
238
+ if node.type == "class_definition":
239
+ classes.append(node)
240
+ for child in node.children:
241
+ visit(child)
242
+
243
+ visit(root)
244
+ return classes
245
+
246
+ def _calculate_class_cohesion(
247
+ self, class_node: Node, content: str
248
+ ) -> ClassCohesion | None:
249
+ """Calculate LCOM4 for a single class.
250
+
251
+ Args:
252
+ class_node: AST node for class definition
253
+ content: Source code (for extracting text)
254
+
255
+ Returns:
256
+ ClassCohesion result, or None if class has no methods
257
+ """
258
+ class_name = self._get_class_name(class_node, content)
259
+ methods = self._extract_methods(class_node)
260
+
261
+ if not methods:
262
+ logger.debug(f"Class {class_name} has no methods, skipping LCOM4")
263
+ return None
264
+
265
+ # Extract attribute accesses for each method
266
+ method_attributes: dict[str, set[str]] = {}
267
+ for method_node in methods:
268
+ method_name = self._get_method_name(method_node, content)
269
+ # Skip special methods that don't access self
270
+ if self._is_static_or_class_method(method_node):
271
+ continue
272
+
273
+ attributes = self._find_attribute_accesses(method_node, content)
274
+ if method_name and attributes:
275
+ method_attributes[method_name] = attributes
276
+
277
+ # Handle edge cases
278
+ if not method_attributes:
279
+ # No methods with attribute accesses
280
+ lcom4 = len(methods) if methods else 0
281
+ return ClassCohesion(
282
+ class_name=class_name,
283
+ lcom4=lcom4,
284
+ method_count=len(methods),
285
+ attribute_count=0,
286
+ method_attributes={},
287
+ )
288
+
289
+ # Calculate LCOM4 using connected components
290
+ lcom4 = self._calculate_lcom4(method_attributes)
291
+
292
+ # Count unique attributes
293
+ all_attributes = set()
294
+ for attrs in method_attributes.values():
295
+ all_attributes.update(attrs)
296
+
297
+ return ClassCohesion(
298
+ class_name=class_name,
299
+ lcom4=lcom4,
300
+ method_count=len(methods),
301
+ attribute_count=len(all_attributes),
302
+ method_attributes=method_attributes,
303
+ )
304
+
305
+ def _get_class_name(self, class_node: Node, content: str) -> str:
306
+ """Extract class name from class definition node.
307
+
308
+ Args:
309
+ class_node: Class definition AST node
310
+ content: Source code
311
+
312
+ Returns:
313
+ Class name or "UnknownClass"
314
+ """
315
+ name_node = class_node.child_by_field_name("name")
316
+ if name_node:
317
+ return content[name_node.start_byte : name_node.end_byte]
318
+ return "UnknownClass"
319
+
320
+ def _extract_methods(self, class_node: Node) -> list[Node]:
321
+ """Extract method nodes from a class.
322
+
323
+ Args:
324
+ class_node: Class definition AST node
325
+
326
+ Returns:
327
+ List of function_definition nodes that are methods
328
+ """
329
+ methods = []
330
+ body = class_node.child_by_field_name("body")
331
+ if not body:
332
+ return methods
333
+
334
+ for child in body.children:
335
+ if child.type == "function_definition":
336
+ methods.append(child)
337
+ elif child.type == "decorated_definition":
338
+ # Decorated methods: @decorator\ndef method(...)
339
+ # Find the function_definition inside
340
+ for subchild in child.children:
341
+ if subchild.type == "function_definition":
342
+ methods.append(subchild)
343
+ break
344
+
345
+ return methods
346
+
347
+ def _get_method_name(self, method_node: Node, content: str) -> str | None:
348
+ """Extract method name from function definition.
349
+
350
+ Args:
351
+ method_node: Function definition AST node
352
+ content: Source code
353
+
354
+ Returns:
355
+ Method name or None
356
+ """
357
+ name_node = method_node.child_by_field_name("name")
358
+ if name_node:
359
+ return content[name_node.start_byte : name_node.end_byte]
360
+ return None
361
+
362
+ def _is_static_or_class_method(self, method_node: Node) -> bool:
363
+ """Check if method is @staticmethod or @classmethod.
364
+
365
+ Args:
366
+ method_node: Function definition AST node
367
+
368
+ Returns:
369
+ True if method is static or class method
370
+ """
371
+ # Check if parent is decorated_definition (for decorated methods)
372
+ parent = method_node.parent
373
+ if parent and parent.type == "decorated_definition":
374
+ # Look for decorators in parent's children
375
+ for child in parent.children:
376
+ if child.type == "decorator":
377
+ decorator_text = child.text.decode("utf-8")
378
+ if (
379
+ "@staticmethod" in decorator_text
380
+ or "@classmethod" in decorator_text
381
+ ):
382
+ return True
383
+
384
+ # Also check direct children (in case structure is different)
385
+ for child in method_node.children:
386
+ if child.type == "decorator":
387
+ decorator_text = child.text.decode("utf-8")
388
+ if (
389
+ "@staticmethod" in decorator_text
390
+ or "@classmethod" in decorator_text
391
+ ):
392
+ return True
393
+
394
+ return False
395
+
396
+ def _find_attribute_accesses(self, method_node: Node, content: str) -> set[str]:
397
+ """Find all self.attribute accesses in a method.
398
+
399
+ Args:
400
+ method_node: Function definition AST node
401
+ content: Source code
402
+
403
+ Returns:
404
+ Set of attribute names accessed via self
405
+ """
406
+ attributes = set()
407
+
408
+ def visit(node: Node) -> None:
409
+ # Look for attribute access: self.attribute
410
+ if node.type == "attribute":
411
+ # Check if object is 'self'
412
+ obj_node = node.child_by_field_name("object")
413
+ if obj_node and obj_node.type == "identifier":
414
+ obj_name = content[obj_node.start_byte : obj_node.end_byte]
415
+ if obj_name == "self":
416
+ # Extract attribute name
417
+ attr_node = node.child_by_field_name("attribute")
418
+ if attr_node:
419
+ attr_name = content[
420
+ attr_node.start_byte : attr_node.end_byte
421
+ ]
422
+ attributes.add(attr_name)
423
+
424
+ for child in node.children:
425
+ visit(child)
426
+
427
+ visit(method_node)
428
+ return attributes
429
+
430
+ def _calculate_lcom4(self, method_attributes: dict[str, set[str]]) -> int:
431
+ """Calculate LCOM4 using connected components.
432
+
433
+ Uses Union-Find to efficiently count connected components
434
+ in the method-attribute graph.
435
+
436
+ Args:
437
+ method_attributes: Mapping of method names to their attributes
438
+
439
+ Returns:
440
+ LCOM4 value (number of connected components)
441
+ """
442
+ if not method_attributes:
443
+ return 0
444
+
445
+ methods = list(method_attributes.keys())
446
+
447
+ # Edge case: single method
448
+ if len(methods) == 1:
449
+ return 1
450
+
451
+ # Initialize union-find
452
+ uf = UnionFind(methods)
453
+
454
+ # Connect methods that share attributes
455
+ methods_list = list(methods)
456
+ for i, method1 in enumerate(methods_list):
457
+ for method2 in methods_list[i + 1 :]:
458
+ # Check if methods share any attributes
459
+ shared = method_attributes[method1] & method_attributes[method2]
460
+ if shared:
461
+ uf.union(method1, method2)
462
+
463
+ return uf.count_components()