mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +111 -0
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +74 -0
  7. mcp_vector_search/analysis/collectors/base.py +164 -0
  8. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  9. mcp_vector_search/analysis/collectors/complexity.py +743 -0
  10. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  11. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  12. mcp_vector_search/analysis/collectors/smells.py +325 -0
  13. mcp_vector_search/analysis/debt.py +516 -0
  14. mcp_vector_search/analysis/interpretation.py +685 -0
  15. mcp_vector_search/analysis/metrics.py +414 -0
  16. mcp_vector_search/analysis/reporters/__init__.py +7 -0
  17. mcp_vector_search/analysis/reporters/console.py +646 -0
  18. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  19. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  20. mcp_vector_search/analysis/storage/__init__.py +93 -0
  21. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  22. mcp_vector_search/analysis/storage/schema.py +245 -0
  23. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  24. mcp_vector_search/analysis/trends.py +308 -0
  25. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  26. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  27. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  28. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  29. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  30. mcp_vector_search/cli/commands/analyze.py +1062 -0
  31. mcp_vector_search/cli/commands/chat.py +1455 -0
  32. mcp_vector_search/cli/commands/index.py +621 -5
  33. mcp_vector_search/cli/commands/index_background.py +467 -0
  34. mcp_vector_search/cli/commands/init.py +13 -0
  35. mcp_vector_search/cli/commands/install.py +597 -335
  36. mcp_vector_search/cli/commands/install_old.py +8 -4
  37. mcp_vector_search/cli/commands/mcp.py +78 -6
  38. mcp_vector_search/cli/commands/reset.py +68 -26
  39. mcp_vector_search/cli/commands/search.py +224 -8
  40. mcp_vector_search/cli/commands/setup.py +1184 -0
  41. mcp_vector_search/cli/commands/status.py +339 -5
  42. mcp_vector_search/cli/commands/uninstall.py +276 -357
  43. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  44. mcp_vector_search/cli/commands/visualize/cli.py +292 -0
  45. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  46. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  47. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
  48. mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
  49. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  50. mcp_vector_search/cli/commands/visualize/server.py +600 -0
  51. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  52. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  53. mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
  54. mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
  55. mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
  56. mcp_vector_search/cli/didyoumean.py +27 -2
  57. mcp_vector_search/cli/main.py +127 -160
  58. mcp_vector_search/cli/output.py +158 -13
  59. mcp_vector_search/config/__init__.py +4 -0
  60. mcp_vector_search/config/default_thresholds.yaml +52 -0
  61. mcp_vector_search/config/settings.py +12 -0
  62. mcp_vector_search/config/thresholds.py +273 -0
  63. mcp_vector_search/core/__init__.py +16 -0
  64. mcp_vector_search/core/auto_indexer.py +3 -3
  65. mcp_vector_search/core/boilerplate.py +186 -0
  66. mcp_vector_search/core/config_utils.py +394 -0
  67. mcp_vector_search/core/database.py +406 -94
  68. mcp_vector_search/core/embeddings.py +24 -0
  69. mcp_vector_search/core/exceptions.py +11 -0
  70. mcp_vector_search/core/git.py +380 -0
  71. mcp_vector_search/core/git_hooks.py +4 -4
  72. mcp_vector_search/core/indexer.py +632 -54
  73. mcp_vector_search/core/llm_client.py +756 -0
  74. mcp_vector_search/core/models.py +91 -1
  75. mcp_vector_search/core/project.py +17 -0
  76. mcp_vector_search/core/relationships.py +473 -0
  77. mcp_vector_search/core/scheduler.py +11 -11
  78. mcp_vector_search/core/search.py +179 -29
  79. mcp_vector_search/mcp/server.py +819 -9
  80. mcp_vector_search/parsers/python.py +285 -5
  81. mcp_vector_search/utils/__init__.py +2 -0
  82. mcp_vector_search/utils/gitignore.py +0 -3
  83. mcp_vector_search/utils/gitignore_updater.py +212 -0
  84. mcp_vector_search/utils/monorepo.py +66 -4
  85. mcp_vector_search/utils/timing.py +10 -6
  86. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
  87. mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
  88. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
  89. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
  90. mcp_vector_search/cli/commands/visualize.py +0 -1467
  91. mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
  92. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,514 @@
1
+ """Halstead complexity metrics collector for structural code analysis.
2
+
3
+ This module implements Halstead complexity measures, which quantify code
4
+ complexity based on the number and frequency of operators and operands.
5
+
6
+ Halstead Metrics:
7
+ - n1: Number of distinct operators
8
+ - n2: Number of distinct operands
9
+ - N1: Total number of operators
10
+ - N2: Total number of operands
11
+ - Vocabulary (n): n1 + n2
12
+ - Length (N): N1 + N2
13
+ - Volume (V): N × log₂(n) - Information content in bits
14
+ - Difficulty (D): (n1/2) × (N2/n2) - How hard to understand
15
+ - Effort (E): D × V - Mental effort required
16
+ - Time (T): E / 18 - Estimated programming time in seconds
17
+ - Bugs (B): V / 3000 - Estimated number of bugs
18
+
19
+ References:
20
+ Halstead, Maurice H. (1977). Elements of Software Science.
21
+ https://en.wikipedia.org/wiki/Halstead_complexity_measures
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import math
27
+ from dataclasses import dataclass
28
+ from typing import TYPE_CHECKING, Any
29
+
30
+ from .base import CollectorContext, MetricCollector
31
+
32
+ if TYPE_CHECKING:
33
+ from tree_sitter import Node
34
+
35
+
36
+ @dataclass
37
+ class HalsteadMetrics:
38
+ """Halstead complexity metrics for a code unit.
39
+
40
+ Attributes:
41
+ distinct_operators: Number of distinct operators (n1)
42
+ distinct_operands: Number of distinct operands (n2)
43
+ total_operators: Total number of operators (N1)
44
+ total_operands: Total number of operands (N2)
45
+ vocabulary: Program vocabulary (n = n1 + n2)
46
+ length: Program length (N = N1 + N2)
47
+ volume: Information content in bits (V = N × log₂(n))
48
+ difficulty: How hard to understand (D = (n1/2) × (N2/n2))
49
+ effort: Mental effort required (E = D × V)
50
+ time_seconds: Estimated programming time (T = E / 18)
51
+ estimated_bugs: Estimated number of bugs (B = V / 3000)
52
+
53
+ Example:
54
+ Simple function:
55
+ >>> def add(a, b):
56
+ ... return a + b
57
+
58
+ Operators: def, return, +, (, ), , ≈ 6 distinct
59
+ Operands: add, a, b ≈ 3 distinct
60
+
61
+ metrics = HalsteadMetrics.from_counts(
62
+ n1=6, n2=3, N1=6, N2=6
63
+ )
64
+ # Volume ≈ 12 × log₂(9) ≈ 38 bits
65
+ """
66
+
67
+ # Raw counts
68
+ distinct_operators: int # n1
69
+ distinct_operands: int # n2
70
+ total_operators: int # N1
71
+ total_operands: int # N2
72
+
73
+ # Derived metrics
74
+ vocabulary: int # n = n1 + n2
75
+ length: int # N = N1 + N2
76
+ volume: float # V = N × log₂(n)
77
+ difficulty: float # D = (n1/2) × (N2/n2)
78
+ effort: float # E = D × V
79
+ time_seconds: float # T = E / 18
80
+ estimated_bugs: float # B = V / 3000
81
+
82
+ @classmethod
83
+ def from_counts(
84
+ cls,
85
+ n1: int,
86
+ n2: int,
87
+ N1: int, # noqa: N803 - Halstead notation uses uppercase N1, N2
88
+ N2: int, # noqa: N803 - Halstead notation uses uppercase N1, N2
89
+ ) -> HalsteadMetrics:
90
+ """Calculate all Halstead metrics from raw operator/operand counts.
91
+
92
+ Args:
93
+ n1: Number of distinct operators
94
+ n2: Number of distinct operands
95
+ N1: Total number of operators
96
+ N2: Total number of operands
97
+
98
+ Returns:
99
+ HalsteadMetrics with all derived metrics calculated
100
+
101
+ Example:
102
+ >>> metrics = HalsteadMetrics.from_counts(6, 3, 6, 6)
103
+ >>> metrics.vocabulary
104
+ 9
105
+ >>> metrics.volume > 0
106
+ True
107
+ """
108
+ vocabulary = n1 + n2
109
+ length = N1 + N2
110
+
111
+ # Handle edge cases to avoid division by zero or log(0)
112
+ if vocabulary == 0 or length == 0:
113
+ return cls(
114
+ distinct_operators=n1,
115
+ distinct_operands=n2,
116
+ total_operators=N1,
117
+ total_operands=N2,
118
+ vocabulary=vocabulary,
119
+ length=length,
120
+ volume=0.0,
121
+ difficulty=0.0,
122
+ effort=0.0,
123
+ time_seconds=0.0,
124
+ estimated_bugs=0.0,
125
+ )
126
+
127
+ # Calculate derived metrics
128
+ volume = length * math.log2(vocabulary)
129
+
130
+ # Difficulty = (n1/2) × (N2/n2)
131
+ # Avoid division by zero
132
+ if n2 == 0:
133
+ difficulty = 0.0
134
+ else:
135
+ difficulty = (n1 / 2) * (N2 / n2)
136
+
137
+ effort = difficulty * volume
138
+ time_seconds = effort / 18 # Stroud number (psychological moments per second)
139
+ estimated_bugs = volume / 3000 # Empirical constant
140
+
141
+ return cls(
142
+ distinct_operators=n1,
143
+ distinct_operands=n2,
144
+ total_operators=N1,
145
+ total_operands=N2,
146
+ vocabulary=vocabulary,
147
+ length=length,
148
+ volume=volume,
149
+ difficulty=difficulty,
150
+ effort=effort,
151
+ time_seconds=time_seconds,
152
+ estimated_bugs=estimated_bugs,
153
+ )
154
+
155
+
156
+ # Language-specific operator and operand definitions
157
+ # Maps programming language to sets of operators and node type categories
158
+
159
+ PYTHON_OPERATORS = {
160
+ # Binary operators
161
+ "+",
162
+ "-",
163
+ "*",
164
+ "/",
165
+ "//",
166
+ "%",
167
+ "**",
168
+ "@",
169
+ # Comparison
170
+ "==",
171
+ "!=",
172
+ "<",
173
+ ">",
174
+ "<=",
175
+ ">=",
176
+ # Logical
177
+ "and",
178
+ "or",
179
+ "not",
180
+ "is",
181
+ "in",
182
+ # Bitwise
183
+ "&",
184
+ "|",
185
+ "^",
186
+ "~",
187
+ "<<",
188
+ ">>",
189
+ # Assignment
190
+ "=",
191
+ "+=",
192
+ "-=",
193
+ "*=",
194
+ "/=",
195
+ "//=",
196
+ "%=",
197
+ "**=",
198
+ "&=",
199
+ "|=",
200
+ "^=",
201
+ ">>=",
202
+ "<<=",
203
+ "@=",
204
+ # Control keywords
205
+ "if",
206
+ "else",
207
+ "elif",
208
+ "for",
209
+ "while",
210
+ "with",
211
+ "try",
212
+ "except",
213
+ "finally",
214
+ "raise",
215
+ "def",
216
+ "class",
217
+ "lambda",
218
+ "return",
219
+ "yield",
220
+ "yield from",
221
+ "import",
222
+ "from",
223
+ "as",
224
+ "assert",
225
+ "pass",
226
+ "break",
227
+ "continue",
228
+ "global",
229
+ "nonlocal",
230
+ "del",
231
+ # Access/call operators
232
+ ".",
233
+ "[",
234
+ "]",
235
+ "(",
236
+ ")",
237
+ ",",
238
+ ":",
239
+ "->",
240
+ }
241
+
242
+ PYTHON_OPERATOR_NODE_TYPES = {
243
+ "binary_operator",
244
+ "unary_operator",
245
+ "boolean_operator",
246
+ "comparison_operator",
247
+ "assignment",
248
+ "augmented_assignment",
249
+ "if_statement",
250
+ "elif_clause",
251
+ "else_clause",
252
+ "for_statement",
253
+ "while_statement",
254
+ "function_definition",
255
+ "class_definition",
256
+ "lambda",
257
+ "return_statement",
258
+ "yield",
259
+ "import_statement",
260
+ "import_from_statement",
261
+ "try_statement",
262
+ "except_clause",
263
+ "finally_clause",
264
+ "raise_statement",
265
+ "with_statement",
266
+ "assert_statement",
267
+ "pass_statement",
268
+ "break_statement",
269
+ "continue_statement",
270
+ "del_statement",
271
+ "call",
272
+ "subscript",
273
+ "attribute",
274
+ "global_statement",
275
+ "nonlocal_statement",
276
+ }
277
+
278
+ PYTHON_OPERAND_NODE_TYPES = {
279
+ "identifier",
280
+ "integer",
281
+ "float",
282
+ "string",
283
+ "true",
284
+ "false",
285
+ "none",
286
+ "concatenated_string",
287
+ "formatted_string",
288
+ }
289
+
290
+
291
+ class HalsteadCollector(MetricCollector):
292
+ """Collects Halstead complexity metrics using tree-sitter AST traversal.
293
+
294
+ This collector analyzes code to count operators and operands, then
295
+ calculates Halstead's software complexity metrics. These metrics
296
+ provide insights into code volume, difficulty, and estimated bugs.
297
+
298
+ Operators are language constructs that perform operations:
299
+ - Arithmetic: +, -, *, /, etc.
300
+ - Comparison: ==, !=, <, >, etc.
301
+ - Logical: and, or, not
302
+ - Control flow: if, for, while, try, etc.
303
+ - Definitions: def, class, lambda
304
+ - Access: ., [], ()
305
+
306
+ Operands are the data being operated on:
307
+ - Variable names (identifiers)
308
+ - Literals (numbers, strings, booleans)
309
+ - Constants (None, True, False)
310
+
311
+ Example:
312
+ >>> code = '''
313
+ ... def calculate(x, y):
314
+ ... if x > 0:
315
+ ... return x + y
316
+ ... return 0
317
+ ... '''
318
+ >>> # Operators: def, if, >, return (×2), +
319
+ >>> # Operands: calculate, x (×3), y, 0 (×2)
320
+ >>> # Result: High volume indicates complex logic
321
+
322
+ Performance:
323
+ Target: <2ms per file
324
+ Scales linearly with AST node count
325
+ """
326
+
327
+ def __init__(self) -> None:
328
+ """Initialize Halstead metrics collector."""
329
+ self._operators: set[str] = set()
330
+ self._operands: set[str] = set()
331
+ self._total_operators = 0
332
+ self._total_operands = 0
333
+
334
+ @property
335
+ def name(self) -> str:
336
+ """Return collector identifier.
337
+
338
+ Returns:
339
+ Collector name "halstead"
340
+ """
341
+ return "halstead"
342
+
343
+ def collect_node(self, node: Node, context: CollectorContext, depth: int) -> None:
344
+ """Process AST node and count operators/operands.
345
+
346
+ Classifies each node as an operator, operand, or neither, and
347
+ updates the running counts accordingly.
348
+
349
+ Args:
350
+ node: Current tree-sitter AST node
351
+ context: Shared context with language and source info
352
+ depth: Current depth in AST (unused)
353
+ """
354
+ language = context.language
355
+ node_type = node.type
356
+
357
+ # Get node text for operator/operand identification
358
+ node_text = self._get_node_text(node, context.source_code)
359
+
360
+ # Check if node is an operator
361
+ if self._is_operator(node_type, node_text, language):
362
+ self._operators.add(node_text)
363
+ self._total_operators += 1
364
+ # Check if node is an operand
365
+ elif self._is_operand(node_type, node_text, language):
366
+ self._operands.add(node_text)
367
+ self._total_operands += 1
368
+
369
+ def finalize_function(
370
+ self, node: Node, context: CollectorContext
371
+ ) -> dict[str, Any]:
372
+ """Return final Halstead metrics for completed function.
373
+
374
+ Calculates all derived Halstead metrics from the accumulated
375
+ operator and operand counts.
376
+
377
+ Args:
378
+ node: Function definition node
379
+ context: Shared context
380
+
381
+ Returns:
382
+ Dictionary with Halstead metrics:
383
+ - halstead_volume: Information content in bits
384
+ - halstead_difficulty: How hard to understand
385
+ - halstead_effort: Mental effort required
386
+ - halstead_bugs: Estimated number of bugs
387
+ - halstead_n1: Distinct operators count
388
+ - halstead_n2: Distinct operands count
389
+ - halstead_N1: Total operators count
390
+ - halstead_N2: Total operands count
391
+ """
392
+ metrics = HalsteadMetrics.from_counts(
393
+ n1=len(self._operators),
394
+ n2=len(self._operands),
395
+ N1=self._total_operators,
396
+ N2=self._total_operands,
397
+ )
398
+
399
+ return {
400
+ "halstead_volume": metrics.volume,
401
+ "halstead_difficulty": metrics.difficulty,
402
+ "halstead_effort": metrics.effort,
403
+ "halstead_bugs": metrics.estimated_bugs,
404
+ "halstead_n1": metrics.distinct_operators,
405
+ "halstead_n2": metrics.distinct_operands,
406
+ "halstead_N1": metrics.total_operators,
407
+ "halstead_N2": metrics.total_operands,
408
+ }
409
+
410
+ def reset(self) -> None:
411
+ """Reset collector state for next function."""
412
+ self._operators.clear()
413
+ self._operands.clear()
414
+ self._total_operators = 0
415
+ self._total_operands = 0
416
+
417
+ def _get_node_text(self, node: Node, source: bytes) -> str:
418
+ """Extract text content from a tree-sitter node.
419
+
420
+ Args:
421
+ node: Tree-sitter AST node
422
+ source: Raw source code as bytes
423
+
424
+ Returns:
425
+ Decoded text content of the node
426
+ """
427
+ return node.text.decode("utf-8") if node.text else ""
428
+
429
+ def _is_operator(self, node_type: str, node_text: str, language: str) -> bool:
430
+ """Check if node represents an operator.
431
+
432
+ An operator is a language construct that performs an operation.
433
+ This includes arithmetic operators, control flow keywords, etc.
434
+
435
+ Args:
436
+ node_type: Tree-sitter node type
437
+ node_text: Text content of the node
438
+ language: Programming language identifier
439
+
440
+ Returns:
441
+ True if node is an operator, False otherwise
442
+ """
443
+ # Language-specific operator detection
444
+ if language == "python":
445
+ # Check if node type is an operator type
446
+ if node_type in PYTHON_OPERATOR_NODE_TYPES:
447
+ return True
448
+
449
+ # Check if node text matches known operators
450
+ if node_text in PYTHON_OPERATORS:
451
+ return True
452
+
453
+ # Default: not an operator
454
+ return False
455
+
456
+ def _is_operand(self, node_type: str, node_text: str, language: str) -> bool:
457
+ """Check if node represents an operand.
458
+
459
+ An operand is data being operated on: variables, literals, constants.
460
+
461
+ Args:
462
+ node_type: Tree-sitter node type
463
+ node_text: Text content of the node
464
+ language: Programming language identifier
465
+
466
+ Returns:
467
+ True if node is an operand, False otherwise
468
+ """
469
+ # Language-specific operand detection
470
+ if language == "python":
471
+ # Exclude Python keywords that aren't literals
472
+ python_keywords = {
473
+ "if",
474
+ "else",
475
+ "elif",
476
+ "for",
477
+ "while",
478
+ "with",
479
+ "try",
480
+ "except",
481
+ "finally",
482
+ "raise",
483
+ "def",
484
+ "class",
485
+ "lambda",
486
+ "return",
487
+ "yield",
488
+ "import",
489
+ "from",
490
+ "as",
491
+ "assert",
492
+ "pass",
493
+ "break",
494
+ "continue",
495
+ "global",
496
+ "nonlocal",
497
+ "del",
498
+ "and",
499
+ "or",
500
+ "not",
501
+ "is",
502
+ "in",
503
+ }
504
+
505
+ # Identifiers are operands if not keywords
506
+ if node_type == "identifier" and node_text in python_keywords:
507
+ return False
508
+
509
+ # Check if node type is an operand type
510
+ if node_type in PYTHON_OPERAND_NODE_TYPES:
511
+ return True
512
+
513
+ # Default: not an operand
514
+ return False