mcp-vector-search 1.0.3__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +48 -1
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +35 -0
  7. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  8. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  9. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  10. mcp_vector_search/analysis/collectors/smells.py +325 -0
  11. mcp_vector_search/analysis/debt.py +516 -0
  12. mcp_vector_search/analysis/interpretation.py +685 -0
  13. mcp_vector_search/analysis/metrics.py +74 -1
  14. mcp_vector_search/analysis/reporters/__init__.py +3 -1
  15. mcp_vector_search/analysis/reporters/console.py +424 -0
  16. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  17. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  18. mcp_vector_search/analysis/storage/__init__.py +93 -0
  19. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  20. mcp_vector_search/analysis/storage/schema.py +245 -0
  21. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  22. mcp_vector_search/analysis/trends.py +308 -0
  23. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  24. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  25. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  26. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  27. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  28. mcp_vector_search/cli/commands/analyze.py +665 -11
  29. mcp_vector_search/cli/commands/chat.py +193 -0
  30. mcp_vector_search/cli/commands/index.py +600 -2
  31. mcp_vector_search/cli/commands/index_background.py +467 -0
  32. mcp_vector_search/cli/commands/search.py +194 -1
  33. mcp_vector_search/cli/commands/setup.py +64 -13
  34. mcp_vector_search/cli/commands/status.py +302 -3
  35. mcp_vector_search/cli/commands/visualize/cli.py +26 -10
  36. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +8 -4
  37. mcp_vector_search/cli/commands/visualize/graph_builder.py +167 -234
  38. mcp_vector_search/cli/commands/visualize/server.py +304 -15
  39. mcp_vector_search/cli/commands/visualize/templates/base.py +60 -6
  40. mcp_vector_search/cli/commands/visualize/templates/scripts.py +2100 -65
  41. mcp_vector_search/cli/commands/visualize/templates/styles.py +1297 -88
  42. mcp_vector_search/cli/didyoumean.py +5 -0
  43. mcp_vector_search/cli/main.py +16 -5
  44. mcp_vector_search/cli/output.py +134 -5
  45. mcp_vector_search/config/thresholds.py +89 -1
  46. mcp_vector_search/core/__init__.py +16 -0
  47. mcp_vector_search/core/database.py +39 -2
  48. mcp_vector_search/core/embeddings.py +24 -0
  49. mcp_vector_search/core/git.py +380 -0
  50. mcp_vector_search/core/indexer.py +445 -84
  51. mcp_vector_search/core/llm_client.py +9 -4
  52. mcp_vector_search/core/models.py +88 -1
  53. mcp_vector_search/core/relationships.py +473 -0
  54. mcp_vector_search/core/search.py +1 -1
  55. mcp_vector_search/mcp/server.py +795 -4
  56. mcp_vector_search/parsers/python.py +285 -5
  57. mcp_vector_search/utils/gitignore.py +0 -3
  58. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +3 -2
  59. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/RECORD +62 -39
  60. mcp_vector_search/cli/commands/visualize.py.original +0 -2536
  61. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +0 -0
  62. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +0 -0
  63. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -62,6 +62,11 @@ class EnhancedDidYouMeanTyper(typer.Typer):
62
62
  if click_group is None:
63
63
  return None
64
64
 
65
+ # If click_group is an integer, it's an exit code from standalone_mode=False
66
+ # Return it as-is to preserve exit code propagation
67
+ if isinstance(click_group, int):
68
+ return click_group
69
+
65
70
  # Create enhanced DYM group with original group's properties
66
71
  enhanced_group = EnhancedDidYouMeanGroup(
67
72
  name=click_group.name,
@@ -81,7 +81,7 @@ explore unfamiliar projects, and integrate with AI coding tools via MCP.
81
81
  [bold cyan]MAIN COMMANDS:[/bold cyan]
82
82
  setup 🚀 Zero-config setup (indexes + configures MCP)
83
83
  search 🔍 Semantic search (finds code by meaning)
84
- chat 🤖 LLM-powered Q&A about your code (needs API key)
84
+ chat/ask 🤖 LLM-powered Q&A about your code (needs API key)
85
85
  status 📊 Show project status
86
86
  visualize 📊 Interactive code graph
87
87
 
@@ -94,7 +94,7 @@ explore unfamiliar projects, and integrate with AI coding tools via MCP.
94
94
  mcp-vector-search search "error handling"
95
95
  mcp-vector-search search --files "*.ts" "authentication"
96
96
  mcp-vector-search chat "where is the database configured?"
97
- mcp-vector-search chat "how does auth work in this project?"
97
+ mcp-vector-search ask "how does auth work in this project?"
98
98
 
99
99
  [bold cyan]MORE COMMANDS:[/bold cyan]
100
100
  install 📦 Install project and MCP integrations
@@ -166,6 +166,9 @@ app.add_typer(search_app, name="search", help="🔍 Search code semantically")
166
166
 
167
167
  # 7.5. CHAT - LLM-powered intelligent search
168
168
  app.add_typer(chat_app, name="chat", help="🤖 Ask questions about code with LLM")
169
+ app.add_typer(
170
+ chat_app, name="ask", help="🤖 Ask questions about code with LLM (alias for chat)"
171
+ )
169
172
 
170
173
  # 8. INDEX - Index codebase
171
174
  app.add_typer(index_app, name="index", help="📇 Index codebase for semantic search")
@@ -357,7 +360,11 @@ def cli_with_suggestions():
357
360
 
358
361
  try:
359
362
  # Call the app with standalone_mode=False to get exceptions instead of sys.exit
360
- app(standalone_mode=False)
363
+ # Capture return value - when standalone_mode=False, typer.Exit returns code instead of raising
364
+ exit_code = app(standalone_mode=False)
365
+ # Propagate non-zero exit codes (e.g., from --fail-on-smell quality gate)
366
+ if exit_code is not None and exit_code != 0:
367
+ sys.exit(exit_code)
361
368
  except click.UsageError as e:
362
369
  # Check if it's a "No such command" error
363
370
  if "No such command" in str(e):
@@ -395,8 +402,12 @@ def cli_with_suggestions():
395
402
  except click.Abort:
396
403
  # User interrupted (Ctrl+C)
397
404
  sys.exit(1)
398
- except (SystemExit, click.exceptions.Exit):
399
- # Re-raise system exits and typer.Exit
405
+ except (SystemExit, click.exceptions.Exit) as e:
406
+ # Re-raise system exits and typer.Exit with their exit codes
407
+ if hasattr(e, "exit_code"):
408
+ sys.exit(e.exit_code)
409
+ elif hasattr(e, "code"):
410
+ sys.exit(e.code if e.code is not None else 0)
400
411
  raise
401
412
  except Exception as e:
402
413
  # For other exceptions, show error and exit if verbose logging is enabled
@@ -24,6 +24,46 @@ from ..core.models import ProjectInfo, SearchResult
24
24
  console = Console()
25
25
 
26
26
 
27
+ def _get_grade_color(grade: str) -> str:
28
+ """Get color for complexity grade."""
29
+ grade_colors = {
30
+ "A": "green",
31
+ "B": "cyan",
32
+ "C": "yellow",
33
+ "D": "orange",
34
+ "F": "red",
35
+ }
36
+ return grade_colors.get(grade, "white")
37
+
38
+
39
+ def _get_complexity_color(complexity: int) -> str:
40
+ """Get color based on cognitive complexity value."""
41
+ if complexity <= 5:
42
+ return "green"
43
+ elif complexity <= 10:
44
+ return "cyan"
45
+ elif complexity <= 20:
46
+ return "yellow"
47
+ elif complexity <= 30:
48
+ return "orange"
49
+ else:
50
+ return "red"
51
+
52
+
53
+ def _get_quality_color(quality: int) -> str:
54
+ """Get color based on quality score (0-100)."""
55
+ if quality >= 80:
56
+ return "green"
57
+ elif quality >= 60:
58
+ return "cyan"
59
+ elif quality >= 40:
60
+ return "yellow"
61
+ elif quality >= 20:
62
+ return "orange"
63
+ else:
64
+ return "red"
65
+
66
+
27
67
  def setup_logging(level: str = "WARNING") -> None:
28
68
  """Setup structured logging with rich formatting.
29
69
 
@@ -113,8 +153,17 @@ def print_search_results(
113
153
  query: str,
114
154
  show_content: bool = True,
115
155
  max_content_lines: int = 10,
156
+ quality_weight: float = 0.0,
116
157
  ) -> None:
117
- """Print search results in a formatted display."""
158
+ """Print search results in a formatted display with quality-aware ranking.
159
+
160
+ Args:
161
+ results: List of search results
162
+ query: Original search query
163
+ show_content: Whether to show code content
164
+ max_content_lines: Maximum lines of code to show
165
+ quality_weight: Weight for quality ranking (0.0-1.0), used to show score breakdown
166
+ """
118
167
  if not results:
119
168
  print_warning(f"No results found for query: '{query}'")
120
169
  return
@@ -122,7 +171,14 @@ def print_search_results(
122
171
  console.print(
123
172
  f"\n[bold blue]Search Results for:[/bold blue] [green]'{query}'[/green]"
124
173
  )
125
- console.print(f"[dim]Found {len(results)} results[/dim]\n")
174
+
175
+ # Show quality ranking info if enabled
176
+ if quality_weight > 0.0:
177
+ console.print(
178
+ f"[dim]Found {len(results)} results (quality-aware ranking: {quality_weight:.0%} quality, {(1 - quality_weight):.0%} relevance)[/dim]\n"
179
+ )
180
+ else:
181
+ console.print(f"[dim]Found {len(results)} results[/dim]\n")
126
182
 
127
183
  for i, result in enumerate(results, 1):
128
184
  # Create result header
@@ -132,12 +188,85 @@ def print_search_results(
132
188
  if result.class_name:
133
189
  header += f" in [yellow]{result.class_name}[/yellow]"
134
190
 
135
- # Add location and similarity
191
+ # Add location
136
192
  location = f"[dim]{result.location}[/dim]"
137
- similarity = f"[green]{result.similarity_score:.2%}[/green]"
138
193
 
139
194
  console.print(f"{header}")
140
- console.print(f" {location} | Similarity: {similarity}")
195
+
196
+ # Build metadata line with quality metrics
197
+ metadata_parts = [location]
198
+
199
+ # Show score breakdown if quality ranking is enabled
200
+ if quality_weight > 0.0 and hasattr(result, "_original_similarity"):
201
+ # Quality-aware ranking: show relevance, quality, and combined
202
+ relevance_score = result._original_similarity
203
+ combined_score = result.similarity_score
204
+ quality_score = result.quality_score or 0
205
+
206
+ metadata_parts.append(f"Relevance: [cyan]{relevance_score:.2%}[/cyan]")
207
+ metadata_parts.append(
208
+ f"Quality: [{_get_quality_color(quality_score)}]{quality_score}[/{_get_quality_color(quality_score)}]"
209
+ )
210
+ metadata_parts.append(f"Combined: [green]{combined_score:.2%}[/green]")
211
+ else:
212
+ # Pure semantic search: show only similarity score
213
+ similarity = f"[green]{result.similarity_score:.2%}[/green]"
214
+ metadata_parts.append(f"Similarity: {similarity}")
215
+
216
+ console.print(f" {' | '.join(metadata_parts)}")
217
+
218
+ # Add quality indicator line if quality metrics are available and not shown in scores
219
+ if result.complexity_grade and quality_weight == 0.0:
220
+ # Show quality metrics when not using quality ranking
221
+ quality_indicators = []
222
+
223
+ grade_color = _get_grade_color(result.complexity_grade)
224
+ quality_indicators.append(
225
+ f"Grade: [{grade_color}]{result.complexity_grade}[/{grade_color}]"
226
+ )
227
+
228
+ if result.cognitive_complexity is not None:
229
+ complexity_color = _get_complexity_color(result.cognitive_complexity)
230
+ quality_indicators.append(
231
+ f"Complexity: [{complexity_color}]{result.cognitive_complexity}[/{complexity_color}]"
232
+ )
233
+
234
+ # Show quality indicator with check/cross
235
+ smell_count = result.smell_count or 0
236
+ if smell_count == 0:
237
+ console.print(
238
+ f" [green]✓[/green] {' | '.join(quality_indicators)} | No smells"
239
+ )
240
+ else:
241
+ # List smells if available
242
+ smells_text = f"{smell_count} smells"
243
+ if result.code_smells:
244
+ smell_names = ", ".join(result.code_smells[:3]) # Show first 3
245
+ if len(result.code_smells) > 3:
246
+ smell_names += f", +{len(result.code_smells) - 3} more"
247
+ smells_text = f"{smell_count} smells: [dim]{smell_names}[/dim]"
248
+
249
+ console.print(
250
+ f" [red]✗[/red] {' | '.join(quality_indicators)} | {smells_text}"
251
+ )
252
+ elif result.complexity_grade and quality_weight > 0.0:
253
+ # When using quality ranking, show simpler quality indicator
254
+ smell_count = result.smell_count or 0
255
+ if smell_count == 0:
256
+ console.print(
257
+ f" [green]✓[/green] Grade {result.complexity_grade}, No smells"
258
+ )
259
+ else:
260
+ smells_text = (
261
+ ", ".join(result.code_smells[:3])
262
+ if result.code_smells
263
+ else f"{smell_count} smells"
264
+ )
265
+ if result.code_smells and len(result.code_smells) > 3:
266
+ smells_text += f", +{len(result.code_smells) - 3} more"
267
+ console.print(
268
+ f" [red]✗[/red] Grade {result.complexity_grade}, {smells_text}"
269
+ )
141
270
 
142
271
  # Show code content if requested
143
272
  if show_content and result.content:
@@ -55,19 +55,50 @@ class SmellThresholds:
55
55
  # High complexity
56
56
  high_complexity: int = 15
57
57
 
58
- # God class (too many methods)
58
+ # God class (too many methods and lines)
59
59
  god_class_methods: int = 20
60
+ god_class_lines: int = 500
60
61
 
61
62
  # Feature envy (placeholder for future)
62
63
  feature_envy_external_calls: int = 5
63
64
 
64
65
 
66
+ @dataclass
67
+ class CouplingThresholds:
68
+ """Thresholds for coupling and instability metrics."""
69
+
70
+ # Efferent coupling (Ce) thresholds
71
+ efferent_low: int = 3 # Low coupling (0-3 dependencies)
72
+ efferent_moderate: int = 7 # Moderate coupling (4-7)
73
+ efferent_high: int = 12 # High coupling (8-12)
74
+ # Very high: 13+
75
+
76
+ # Afferent coupling (Ca) thresholds
77
+ afferent_low: int = 2 # Low coupling (0-2 dependents)
78
+ afferent_moderate: int = 5 # Moderate coupling (3-5)
79
+ afferent_high: int = 10 # High coupling (6-10)
80
+ # Very high: 11+
81
+
82
+ # Instability (I) thresholds for grades
83
+ instability_a: float = 0.2 # A grade: very stable (0.0-0.2)
84
+ instability_b: float = 0.4 # B grade: stable (0.2-0.4)
85
+ instability_c: float = 0.6 # C grade: balanced (0.4-0.6)
86
+ instability_d: float = 0.8 # D grade: unstable (0.6-0.8)
87
+ # F grade: very unstable (0.8-1.0)
88
+
89
+ # Category thresholds
90
+ stable_max: float = 0.3 # Stable category (0.0-0.3)
91
+ balanced_max: float = 0.7 # Balanced category (0.3-0.7)
92
+ # Unstable category: 0.7-1.0
93
+
94
+
65
95
  @dataclass
66
96
  class ThresholdConfig:
67
97
  """Complete threshold configuration."""
68
98
 
69
99
  complexity: ComplexityThresholds = field(default_factory=ComplexityThresholds)
70
100
  smells: SmellThresholds = field(default_factory=SmellThresholds)
101
+ coupling: CouplingThresholds = field(default_factory=CouplingThresholds)
71
102
 
72
103
  # Quality gate settings
73
104
  fail_on_f_grade: bool = True
@@ -104,6 +135,7 @@ class ThresholdConfig:
104
135
  """
105
136
  complexity_data = data.get("complexity", {})
106
137
  smells_data = data.get("smells", {})
138
+ coupling_data = data.get("coupling", {})
107
139
 
108
140
  return cls(
109
141
  complexity=(
@@ -114,6 +146,11 @@ class ThresholdConfig:
114
146
  smells=(
115
147
  SmellThresholds(**smells_data) if smells_data else SmellThresholds()
116
148
  ),
149
+ coupling=(
150
+ CouplingThresholds(**coupling_data)
151
+ if coupling_data
152
+ else CouplingThresholds()
153
+ ),
117
154
  fail_on_f_grade=data.get("fail_on_f_grade", True),
118
155
  fail_on_smell_count=data.get("fail_on_smell_count", 10),
119
156
  warn_on_d_grade=data.get("warn_on_d_grade", True),
@@ -147,8 +184,23 @@ class ThresholdConfig:
147
184
  "deep_nesting_depth": self.smells.deep_nesting_depth,
148
185
  "high_complexity": self.smells.high_complexity,
149
186
  "god_class_methods": self.smells.god_class_methods,
187
+ "god_class_lines": self.smells.god_class_lines,
150
188
  "feature_envy_external_calls": self.smells.feature_envy_external_calls,
151
189
  },
190
+ "coupling": {
191
+ "efferent_low": self.coupling.efferent_low,
192
+ "efferent_moderate": self.coupling.efferent_moderate,
193
+ "efferent_high": self.coupling.efferent_high,
194
+ "afferent_low": self.coupling.afferent_low,
195
+ "afferent_moderate": self.coupling.afferent_moderate,
196
+ "afferent_high": self.coupling.afferent_high,
197
+ "instability_a": self.coupling.instability_a,
198
+ "instability_b": self.coupling.instability_b,
199
+ "instability_c": self.coupling.instability_c,
200
+ "instability_d": self.coupling.instability_d,
201
+ "stable_max": self.coupling.stable_max,
202
+ "balanced_max": self.coupling.balanced_max,
203
+ },
152
204
  "fail_on_f_grade": self.fail_on_f_grade,
153
205
  "fail_on_smell_count": self.fail_on_smell_count,
154
206
  "warn_on_d_grade": self.warn_on_d_grade,
@@ -183,3 +235,39 @@ class ThresholdConfig:
183
235
  return "D"
184
236
  else:
185
237
  return "F"
238
+
239
+ def get_instability_grade(self, instability: float) -> str:
240
+ """Get instability grade based on instability value.
241
+
242
+ Args:
243
+ instability: Instability value (0.0-1.0)
244
+
245
+ Returns:
246
+ Grade from A to F
247
+ """
248
+ if instability <= self.coupling.instability_a:
249
+ return "A"
250
+ elif instability <= self.coupling.instability_b:
251
+ return "B"
252
+ elif instability <= self.coupling.instability_c:
253
+ return "C"
254
+ elif instability <= self.coupling.instability_d:
255
+ return "D"
256
+ else:
257
+ return "F"
258
+
259
+ def get_stability_category(self, instability: float) -> str:
260
+ """Get stability category based on instability value.
261
+
262
+ Args:
263
+ instability: Instability value (0.0-1.0)
264
+
265
+ Returns:
266
+ Category: "Stable", "Balanced", or "Unstable"
267
+ """
268
+ if instability <= self.coupling.stable_max:
269
+ return "Stable"
270
+ elif instability <= self.coupling.balanced_max:
271
+ return "Balanced"
272
+ else:
273
+ return "Unstable"
@@ -1 +1,17 @@
1
1
  """Core functionality for MCP Vector Search."""
2
+
3
+ from mcp_vector_search.core.git import (
4
+ GitError,
5
+ GitManager,
6
+ GitNotAvailableError,
7
+ GitNotRepoError,
8
+ GitReferenceError,
9
+ )
10
+
11
+ __all__ = [
12
+ "GitError",
13
+ "GitManager",
14
+ "GitNotAvailableError",
15
+ "GitNotRepoError",
16
+ "GitReferenceError",
17
+ ]
@@ -192,7 +192,7 @@ class ChromaVectorDatabase(VectorDatabase):
192
192
  except BaseException as init_error:
193
193
  # Re-raise system exceptions we should never catch
194
194
  if isinstance(
195
- init_error, (KeyboardInterrupt, SystemExit, GeneratorExit)
195
+ init_error, KeyboardInterrupt | SystemExit | GeneratorExit
196
196
  ):
197
197
  raise
198
198
 
@@ -244,7 +244,7 @@ class ChromaVectorDatabase(VectorDatabase):
244
244
  except BaseException as retry_error:
245
245
  # Re-raise system exceptions
246
246
  if isinstance(
247
- retry_error, (KeyboardInterrupt, SystemExit, GeneratorExit)
247
+ retry_error, KeyboardInterrupt | SystemExit | GeneratorExit
248
248
  ):
249
249
  raise
250
250
 
@@ -477,6 +477,33 @@ class ChromaVectorDatabase(VectorDatabase):
477
477
 
478
478
  if similarity >= similarity_threshold:
479
479
  # Document contains the original content (no metadata appended)
480
+ # Parse code smells from JSON if present
481
+ code_smells = []
482
+ if "code_smells" in metadata:
483
+ try:
484
+ code_smells = json.loads(metadata["code_smells"])
485
+ except (json.JSONDecodeError, TypeError):
486
+ code_smells = []
487
+
488
+ # Calculate quality score from metrics (0-100 scale)
489
+ quality_score = None
490
+ if (
491
+ "cognitive_complexity" in metadata
492
+ and "smell_count" in metadata
493
+ ):
494
+ # Simple quality score: penalize complexity and smells
495
+ complexity = metadata["cognitive_complexity"]
496
+ smells = metadata["smell_count"]
497
+
498
+ # Start with 100, penalize for complexity and smells
499
+ score = 100
500
+ # Complexity penalty: -2 points per complexity unit
501
+ score -= min(50, complexity * 2)
502
+ # Smell penalty: -10 points per smell
503
+ score -= min(30, smells * 10)
504
+
505
+ quality_score = max(0, score)
506
+
480
507
  result = SearchResult(
481
508
  content=doc,
482
509
  file_path=Path(metadata["file_path"]),
@@ -488,6 +515,16 @@ class ChromaVectorDatabase(VectorDatabase):
488
515
  chunk_type=metadata.get("chunk_type", "code"),
489
516
  function_name=metadata.get("function_name") or None,
490
517
  class_name=metadata.get("class_name") or None,
518
+ # Quality metrics from structural analysis
519
+ cognitive_complexity=metadata.get("cognitive_complexity"),
520
+ cyclomatic_complexity=metadata.get("cyclomatic_complexity"),
521
+ max_nesting_depth=metadata.get("max_nesting_depth"),
522
+ parameter_count=metadata.get("parameter_count"),
523
+ lines_of_code=metadata.get("lines_of_code"),
524
+ complexity_grade=metadata.get("complexity_grade"),
525
+ code_smells=code_smells,
526
+ smell_count=metadata.get("smell_count"),
527
+ quality_score=quality_score,
491
528
  )
492
529
  search_results.append(result)
493
530
 
@@ -2,8 +2,32 @@
2
2
 
3
3
  import hashlib
4
4
  import json
5
+ import multiprocessing
6
+ import os
5
7
  from pathlib import Path
6
8
 
9
+
10
+ # Configure tokenizers parallelism based on process context
11
+ # Enable parallelism in main process for 2-4x speedup
12
+ # Disable in forked processes to avoid deadlock warnings
13
+ # See: https://github.com/huggingface/tokenizers/issues/1294
14
+ def _configure_tokenizers_parallelism() -> None:
15
+ """Configure TOKENIZERS_PARALLELISM based on process context."""
16
+ # Check if we're in the main process
17
+ is_main_process = multiprocessing.current_process().name == "MainProcess"
18
+
19
+ if is_main_process:
20
+ # Enable parallelism in main process for better performance
21
+ # This gives 2-4x speedup for embedding generation
22
+ os.environ["TOKENIZERS_PARALLELISM"] = "true"
23
+ else:
24
+ # Disable in forked processes to avoid deadlock
25
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
26
+
27
+
28
+ # Configure before importing sentence_transformers
29
+ _configure_tokenizers_parallelism()
30
+
7
31
  import aiofiles
8
32
  from loguru import logger
9
33
  from sentence_transformers import SentenceTransformer