mcp-vector-search 1.0.3__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +48 -1
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +35 -0
  7. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  8. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  9. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  10. mcp_vector_search/analysis/collectors/smells.py +325 -0
  11. mcp_vector_search/analysis/debt.py +516 -0
  12. mcp_vector_search/analysis/interpretation.py +685 -0
  13. mcp_vector_search/analysis/metrics.py +74 -1
  14. mcp_vector_search/analysis/reporters/__init__.py +3 -1
  15. mcp_vector_search/analysis/reporters/console.py +424 -0
  16. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  17. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  18. mcp_vector_search/analysis/storage/__init__.py +93 -0
  19. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  20. mcp_vector_search/analysis/storage/schema.py +245 -0
  21. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  22. mcp_vector_search/analysis/trends.py +308 -0
  23. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  24. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  25. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  26. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  27. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  28. mcp_vector_search/cli/commands/analyze.py +665 -11
  29. mcp_vector_search/cli/commands/chat.py +193 -0
  30. mcp_vector_search/cli/commands/index.py +600 -2
  31. mcp_vector_search/cli/commands/index_background.py +467 -0
  32. mcp_vector_search/cli/commands/search.py +194 -1
  33. mcp_vector_search/cli/commands/setup.py +64 -13
  34. mcp_vector_search/cli/commands/status.py +302 -3
  35. mcp_vector_search/cli/commands/visualize/cli.py +26 -10
  36. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +8 -4
  37. mcp_vector_search/cli/commands/visualize/graph_builder.py +167 -234
  38. mcp_vector_search/cli/commands/visualize/server.py +304 -15
  39. mcp_vector_search/cli/commands/visualize/templates/base.py +60 -6
  40. mcp_vector_search/cli/commands/visualize/templates/scripts.py +2100 -65
  41. mcp_vector_search/cli/commands/visualize/templates/styles.py +1297 -88
  42. mcp_vector_search/cli/didyoumean.py +5 -0
  43. mcp_vector_search/cli/main.py +16 -5
  44. mcp_vector_search/cli/output.py +134 -5
  45. mcp_vector_search/config/thresholds.py +89 -1
  46. mcp_vector_search/core/__init__.py +16 -0
  47. mcp_vector_search/core/database.py +39 -2
  48. mcp_vector_search/core/embeddings.py +24 -0
  49. mcp_vector_search/core/git.py +380 -0
  50. mcp_vector_search/core/indexer.py +445 -84
  51. mcp_vector_search/core/llm_client.py +9 -4
  52. mcp_vector_search/core/models.py +88 -1
  53. mcp_vector_search/core/relationships.py +473 -0
  54. mcp_vector_search/core/search.py +1 -1
  55. mcp_vector_search/mcp/server.py +795 -4
  56. mcp_vector_search/parsers/python.py +285 -5
  57. mcp_vector_search/utils/gitignore.py +0 -3
  58. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +3 -2
  59. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/RECORD +62 -39
  60. mcp_vector_search/cli/commands/visualize.py.original +0 -2536
  61. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +0 -0
  62. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +0 -0
  63. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,762 @@
1
+ """SQLite-based metrics storage for historical tracking and trend analysis.
2
+
3
+ This module provides the MetricsStore class for persisting code metrics to a
4
+ SQLite database, enabling:
5
+ - Historical tracking of file and project metrics
6
+ - Trend analysis over time
7
+ - Comparison between snapshots
8
+ - Code smell tracking
9
+
10
+ Design Decisions:
11
+ Storage Location: ~/.mcp-vector-search/metrics.db by default
12
+ - Centralized storage for cross-project analysis
13
+ - User can override with custom path
14
+ - Same pattern as baseline manager
15
+
16
+ Connection Pooling: Single connection with row factory
17
+ - SQLite doesn't benefit from pooling (single writer)
18
+ - Row factory enables dict-like access to results
19
+ - Connection reused across operations
20
+
21
+ Transaction Strategy: Auto-commit with explicit transactions for batches
22
+ - Individual saves use auto-commit for simplicity
23
+ - Bulk operations use explicit transactions for performance
24
+
25
+ Performance:
26
+ - Save file metrics: O(1), ~1-2ms per file
27
+ - Save project snapshot: O(1), ~5-10ms
28
+ - Get history: O(n) where n=limit, typically <50ms
29
+ - Get trends: O(k) where k=snapshots, aggregates in SQL
30
+
31
+ Error Handling:
32
+ - IntegrityError: Duplicate entries (file + project + timestamp)
33
+ - OperationalError: Database locked or corrupted
34
+ - All errors logged with context, propagated to caller
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import json
40
+ import sqlite3
41
+ import subprocess
42
+ from dataclasses import dataclass
43
+ from datetime import datetime, timedelta
44
+ from pathlib import Path
45
+ from typing import Any
46
+
47
+ from loguru import logger
48
+
49
+ from ...utils.version import get_version_string
50
+ from ..metrics import FileMetrics, ProjectMetrics
51
+ from .schema import INIT_SCHEMA_SQL, SCHEMA_VERSION, get_schema_version_query
52
+
53
+
54
+ class MetricsStoreError(Exception):
55
+ """Base exception for metrics store errors."""
56
+
57
+ pass
58
+
59
+
60
+ class DatabaseLockedError(MetricsStoreError):
61
+ """Database is locked by another process."""
62
+
63
+ pass
64
+
65
+
66
+ class DuplicateEntryError(MetricsStoreError):
67
+ """Duplicate entry violates unique constraint."""
68
+
69
+ pass
70
+
71
+
72
+ @dataclass
73
+ class GitInfo:
74
+ """Git repository information for snapshot traceability.
75
+
76
+ Attributes:
77
+ commit: Git commit hash (full SHA-1)
78
+ branch: Current branch name (None if detached HEAD)
79
+ remote: Remote repository name (e.g., "origin")
80
+ """
81
+
82
+ commit: str | None = None
83
+ branch: str | None = None
84
+ remote: str | None = None
85
+
86
+
87
+ @dataclass
88
+ class ProjectSnapshot:
89
+ """Project-wide metric snapshot at a point in time.
90
+
91
+ Attributes:
92
+ snapshot_id: Unique snapshot identifier (database ID)
93
+ project_path: Absolute path to project root
94
+ timestamp: When snapshot was taken
95
+ total_files: Number of files analyzed
96
+ total_lines: Total lines across all files
97
+ total_functions: Total number of functions
98
+ total_classes: Total number of classes
99
+ avg_complexity: Average cognitive complexity
100
+ max_complexity: Maximum cognitive complexity
101
+ total_complexity: Sum of all cognitive complexity
102
+ total_smells: Total code smell count
103
+ avg_health_score: Average health score (0.0-1.0)
104
+ grade_distribution: Distribution of complexity grades (A-F)
105
+ git_commit: Git commit hash at time of snapshot
106
+ git_branch: Git branch at time of snapshot
107
+ tool_version: Version of mcp-vector-search used
108
+ """
109
+
110
+ snapshot_id: int
111
+ project_path: str
112
+ timestamp: datetime
113
+ total_files: int
114
+ total_lines: int
115
+ total_functions: int
116
+ total_classes: int
117
+ avg_complexity: float
118
+ max_complexity: int
119
+ total_complexity: int
120
+ total_smells: int
121
+ avg_health_score: float
122
+ grade_distribution: dict[str, int]
123
+ git_commit: str | None = None
124
+ git_branch: str | None = None
125
+ tool_version: str | None = None
126
+
127
+ @classmethod
128
+ def from_row(cls, row: sqlite3.Row) -> ProjectSnapshot:
129
+ """Create ProjectSnapshot from database row.
130
+
131
+ Args:
132
+ row: SQLite row with dict-like access
133
+
134
+ Returns:
135
+ ProjectSnapshot instance
136
+ """
137
+ return cls(
138
+ snapshot_id=row["id"],
139
+ project_path=row["project_path"],
140
+ timestamp=datetime.fromisoformat(row["timestamp"]),
141
+ total_files=row["total_files"],
142
+ total_lines=row["total_lines"],
143
+ total_functions=row["total_functions"],
144
+ total_classes=row["total_classes"],
145
+ avg_complexity=row["avg_complexity"],
146
+ max_complexity=row["max_complexity"],
147
+ total_complexity=row["total_complexity"],
148
+ total_smells=row["total_smells"],
149
+ avg_health_score=row["avg_health_score"],
150
+ grade_distribution=json.loads(row["grade_distribution"]),
151
+ git_commit=row["git_commit"] if row["git_commit"] else None,
152
+ git_branch=row["git_branch"] if row["git_branch"] else None,
153
+ tool_version=row["tool_version"] if row["tool_version"] else None,
154
+ )
155
+
156
+
157
+ @dataclass
158
+ class TrendData:
159
+ """Trend analysis data over time period.
160
+
161
+ Attributes:
162
+ project_path: Project being analyzed
163
+ period_days: Number of days in trend period
164
+ snapshots: List of snapshots in chronological order
165
+ complexity_trend: List of (timestamp, avg_complexity) tuples
166
+ smell_trend: List of (timestamp, total_smells) tuples
167
+ health_trend: List of (timestamp, avg_health_score) tuples
168
+ change_rate: Average daily change in complexity
169
+ """
170
+
171
+ project_path: str
172
+ period_days: int
173
+ snapshots: list[ProjectSnapshot]
174
+ complexity_trend: list[tuple[datetime, float]]
175
+ smell_trend: list[tuple[datetime, int]]
176
+ health_trend: list[tuple[datetime, float]]
177
+ change_rate: float
178
+
179
+ @property
180
+ def improving(self) -> bool:
181
+ """Check if trends are improving (complexity decreasing).
182
+
183
+ Returns:
184
+ True if average complexity is trending down
185
+ """
186
+ return self.change_rate < 0
187
+
188
+
189
+ class MetricsStore:
190
+ """SQLite-based storage for code metrics history.
191
+
192
+ This class provides persistent storage of file and project metrics,
193
+ enabling historical tracking and trend analysis.
194
+
195
+ Storage Strategy:
196
+ - Default location: ~/.mcp-vector-search/metrics.db
197
+ - Single SQLite database with normalized schema
198
+ - Atomic writes with transactions
199
+ - Foreign key constraints for referential integrity
200
+
201
+ Thread Safety:
202
+ - SQLite uses database-level locking
203
+ - Safe for single-threaded CLI usage
204
+ - Not suitable for concurrent writes (would require connection pooling)
205
+
206
+ Example:
207
+ >>> store = MetricsStore()
208
+ >>> metrics = ProjectMetrics(project_root="/path/to/project")
209
+ >>> snapshot_id = store.save_project_snapshot(metrics)
210
+ >>> history = store.get_project_history("/path/to/project", limit=10)
211
+ """
212
+
213
+ def __init__(self, db_path: Path | None = None):
214
+ """Initialize metrics store with database connection.
215
+
216
+ Args:
217
+ db_path: Optional custom database path.
218
+ Defaults to ~/.mcp-vector-search/metrics.db
219
+
220
+ Raises:
221
+ MetricsStoreError: If database initialization fails
222
+ """
223
+ if db_path is None:
224
+ # Default storage location
225
+ storage_dir = Path.home() / ".mcp-vector-search"
226
+ storage_dir.mkdir(parents=True, exist_ok=True)
227
+ db_path = storage_dir / "metrics.db"
228
+
229
+ self.db_path = db_path.resolve()
230
+
231
+ # Initialize database connection
232
+ try:
233
+ self.conn = sqlite3.connect(
234
+ str(self.db_path),
235
+ timeout=30.0, # 30 second timeout for locked database
236
+ check_same_thread=False, # Allow usage across threads (with care)
237
+ )
238
+
239
+ # Enable dict-like row access
240
+ self.conn.row_factory = sqlite3.Row
241
+
242
+ # Enable foreign key constraints
243
+ self.conn.execute("PRAGMA foreign_keys = ON")
244
+
245
+ # Initialize schema if needed
246
+ self._init_schema()
247
+
248
+ logger.info(f"Initialized metrics store: {self.db_path}")
249
+
250
+ except sqlite3.Error as e:
251
+ logger.error(f"Failed to initialize metrics store: {e}")
252
+ raise MetricsStoreError(f"Database initialization failed: {e}") from e
253
+
254
+ def _init_schema(self) -> None:
255
+ """Initialize database schema if not exists.
256
+
257
+ Creates all tables and indexes defined in schema.py.
258
+ Idempotent - safe to call multiple times.
259
+
260
+ Raises:
261
+ MetricsStoreError: If schema creation fails
262
+ """
263
+ try:
264
+ cursor = self.conn.cursor()
265
+
266
+ # Execute all schema initialization statements
267
+ for sql in INIT_SCHEMA_SQL:
268
+ cursor.execute(sql)
269
+
270
+ self.conn.commit()
271
+
272
+ # Verify schema version
273
+ cursor.execute(get_schema_version_query())
274
+ row = cursor.fetchone()
275
+ if row:
276
+ version = row[0]
277
+ logger.debug(f"Database schema version: {version}")
278
+
279
+ if version != SCHEMA_VERSION:
280
+ logger.warning(
281
+ f"Schema version mismatch: {version} vs {SCHEMA_VERSION}"
282
+ )
283
+
284
+ except sqlite3.Error as e:
285
+ logger.error(f"Failed to initialize schema: {e}")
286
+ raise MetricsStoreError(f"Schema initialization failed: {e}") from e
287
+
288
+ def save_project_snapshot(
289
+ self, metrics: ProjectMetrics, git_info: GitInfo | None = None
290
+ ) -> int:
291
+ """Save project-wide metrics snapshot.
292
+
293
+ Args:
294
+ metrics: ProjectMetrics to save
295
+ git_info: Optional git metadata (auto-detected if None)
296
+
297
+ Returns:
298
+ Snapshot ID (database primary key)
299
+
300
+ Raises:
301
+ DuplicateEntryError: If snapshot with same timestamp exists
302
+ MetricsStoreError: If database write fails
303
+
304
+ Performance: O(1), typically 5-10ms
305
+ """
306
+ # Auto-detect git info if not provided
307
+ if git_info is None:
308
+ git_info = self._get_git_info(Path(metrics.project_root))
309
+
310
+ # Compute grade distribution
311
+ grade_dist: dict[str, int] = {"A": 0, "B": 0, "C": 0, "D": 0, "F": 0}
312
+ for file_metrics in metrics.files.values():
313
+ for chunk in file_metrics.chunks:
314
+ grade_dist[chunk.complexity_grade] += 1
315
+
316
+ # Compute average health score
317
+ if metrics.files:
318
+ avg_health = sum(f.health_score for f in metrics.files.values()) / len(
319
+ metrics.files
320
+ )
321
+ else:
322
+ avg_health = 1.0
323
+
324
+ # Compute total smells
325
+ total_smells = sum(
326
+ len(chunk.smells)
327
+ for file_metrics in metrics.files.values()
328
+ for chunk in file_metrics.chunks
329
+ )
330
+
331
+ try:
332
+ cursor = self.conn.cursor()
333
+
334
+ cursor.execute(
335
+ """
336
+ INSERT INTO project_snapshots (
337
+ project_path, timestamp, total_files, total_lines,
338
+ total_functions, total_classes, avg_complexity,
339
+ max_complexity, total_complexity, total_smells,
340
+ avg_health_score, grade_distribution, git_commit,
341
+ git_branch, git_remote, tool_version
342
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
343
+ """,
344
+ (
345
+ metrics.project_root,
346
+ metrics.analyzed_at.isoformat(),
347
+ metrics.total_files,
348
+ metrics.total_lines,
349
+ metrics.total_functions,
350
+ metrics.total_classes,
351
+ metrics.avg_file_complexity,
352
+ max(
353
+ (f.max_complexity for f in metrics.files.values()),
354
+ default=0,
355
+ ),
356
+ sum(f.total_complexity for f in metrics.files.values()),
357
+ total_smells,
358
+ avg_health,
359
+ json.dumps(grade_dist),
360
+ git_info.commit,
361
+ git_info.branch,
362
+ git_info.remote,
363
+ get_version_string(include_build=True),
364
+ ),
365
+ )
366
+
367
+ snapshot_id = cursor.lastrowid
368
+ self.conn.commit()
369
+
370
+ logger.info(
371
+ f"Saved project snapshot {snapshot_id} for {metrics.project_root}"
372
+ )
373
+
374
+ return snapshot_id
375
+
376
+ except sqlite3.IntegrityError as e:
377
+ logger.warning(f"Duplicate snapshot for {metrics.project_root}: {e}")
378
+ raise DuplicateEntryError(
379
+ f"Snapshot already exists for {metrics.project_root} "
380
+ f"at {metrics.analyzed_at}"
381
+ ) from e
382
+ except sqlite3.OperationalError as e:
383
+ if "locked" in str(e).lower():
384
+ raise DatabaseLockedError(f"Database is locked: {self.db_path}") from e
385
+ raise MetricsStoreError(f"Failed to save project snapshot: {e}") from e
386
+ except sqlite3.Error as e:
387
+ logger.error(f"Failed to save project snapshot: {e}")
388
+ raise MetricsStoreError(f"Database write failed: {e}") from e
389
+
390
+ def save_file_metrics(self, file_metrics: FileMetrics, snapshot_id: int) -> int:
391
+ """Save file-level metrics linked to a project snapshot.
392
+
393
+ Args:
394
+ file_metrics: FileMetrics to save
395
+ snapshot_id: Project snapshot ID (foreign key)
396
+
397
+ Returns:
398
+ File metrics ID (database primary key)
399
+
400
+ Raises:
401
+ DuplicateEntryError: If metrics for file + snapshot exists
402
+ MetricsStoreError: If database write fails
403
+
404
+ Performance: O(1), typically 1-2ms per file
405
+ """
406
+ # Compute aggregates if not already done
407
+ file_metrics.compute_aggregates()
408
+
409
+ # Count smells
410
+ smell_count = sum(len(chunk.smells) for chunk in file_metrics.chunks)
411
+
412
+ # Determine overall grade (worst grade across chunks)
413
+ if file_metrics.chunks:
414
+ grades = [chunk.complexity_grade for chunk in file_metrics.chunks]
415
+ worst_grade = max(grades, key=lambda g: "ABCDF".index(g))
416
+ else:
417
+ worst_grade = "A"
418
+
419
+ try:
420
+ cursor = self.conn.cursor()
421
+
422
+ cursor.execute(
423
+ """
424
+ INSERT INTO file_metrics (
425
+ file_path, project_id, total_lines, code_lines,
426
+ comment_lines, blank_lines, function_count, class_count,
427
+ method_count, cognitive_complexity, cyclomatic_complexity,
428
+ total_complexity, avg_complexity, max_complexity,
429
+ smell_count, health_score, complexity_grade
430
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
431
+ """,
432
+ (
433
+ file_metrics.file_path,
434
+ snapshot_id,
435
+ file_metrics.total_lines,
436
+ file_metrics.code_lines,
437
+ file_metrics.comment_lines,
438
+ file_metrics.blank_lines,
439
+ file_metrics.function_count,
440
+ file_metrics.class_count,
441
+ file_metrics.method_count,
442
+ sum(chunk.cognitive_complexity for chunk in file_metrics.chunks),
443
+ sum(chunk.cyclomatic_complexity for chunk in file_metrics.chunks),
444
+ file_metrics.total_complexity,
445
+ file_metrics.avg_complexity,
446
+ file_metrics.max_complexity,
447
+ smell_count,
448
+ file_metrics.health_score,
449
+ worst_grade,
450
+ ),
451
+ )
452
+
453
+ file_id = cursor.lastrowid
454
+ self.conn.commit()
455
+
456
+ logger.debug(f"Saved file metrics {file_id} for {file_metrics.file_path}")
457
+
458
+ return file_id
459
+
460
+ except sqlite3.IntegrityError as e:
461
+ logger.warning(f"Duplicate file metrics for {file_metrics.file_path}: {e}")
462
+ raise DuplicateEntryError(
463
+ f"Metrics already exist for {file_metrics.file_path} "
464
+ f"in snapshot {snapshot_id}"
465
+ ) from e
466
+ except sqlite3.OperationalError as e:
467
+ if "locked" in str(e).lower():
468
+ raise DatabaseLockedError(f"Database is locked: {self.db_path}") from e
469
+ raise MetricsStoreError(f"Failed to save file metrics: {e}") from e
470
+ except sqlite3.Error as e:
471
+ logger.error(f"Failed to save file metrics: {e}")
472
+ raise MetricsStoreError(f"Database write failed: {e}") from e
473
+
474
+ def save_complete_snapshot(self, metrics: ProjectMetrics) -> int:
475
+ """Save complete snapshot (project + all files) in single transaction.
476
+
477
+ This is the recommended method for saving metrics as it ensures
478
+ atomicity and better performance through batching.
479
+
480
+ Args:
481
+ metrics: ProjectMetrics with all file metrics
482
+
483
+ Returns:
484
+ Snapshot ID
485
+
486
+ Raises:
487
+ MetricsStoreError: If save fails (rolls back entire transaction)
488
+
489
+ Performance: O(n) where n=number of files, typically 50-100ms for 100 files
490
+ """
491
+ try:
492
+ # Begin explicit transaction
493
+ self.conn.execute("BEGIN")
494
+
495
+ # Save project snapshot
496
+ snapshot_id = self.save_project_snapshot(metrics)
497
+
498
+ # Save all file metrics
499
+ for file_metrics in metrics.files.values():
500
+ self.save_file_metrics(file_metrics, snapshot_id)
501
+
502
+ # Commit transaction
503
+ self.conn.commit()
504
+
505
+ logger.info(
506
+ f"Saved complete snapshot {snapshot_id} with {len(metrics.files)} files"
507
+ )
508
+
509
+ return snapshot_id
510
+
511
+ except Exception as e:
512
+ # Rollback on any error
513
+ self.conn.rollback()
514
+ logger.error(f"Failed to save complete snapshot: {e}")
515
+ raise MetricsStoreError(f"Failed to save snapshot: {e}") from e
516
+
517
+ def get_file_history(self, file_path: str, limit: int = 10) -> list[FileMetrics]:
518
+ """Get historical metrics for a specific file.
519
+
520
+ Args:
521
+ file_path: Path to file (relative or absolute)
522
+ limit: Maximum number of history entries to return
523
+
524
+ Returns:
525
+ List of FileMetrics ordered by timestamp (newest first)
526
+
527
+ Performance: O(n) where n=limit, typically <50ms
528
+ """
529
+ try:
530
+ cursor = self.conn.cursor()
531
+
532
+ cursor.execute(
533
+ """
534
+ SELECT * FROM file_metrics
535
+ WHERE file_path = ?
536
+ ORDER BY timestamp DESC
537
+ LIMIT ?
538
+ """,
539
+ (file_path, limit),
540
+ )
541
+
542
+ rows = cursor.fetchall()
543
+
544
+ # Convert rows to FileMetrics
545
+ # Note: This is a simplified conversion without chunk details
546
+ # For full chunk history, need separate chunk storage table
547
+ history = []
548
+ for row in rows:
549
+ fm = FileMetrics(
550
+ file_path=row["file_path"],
551
+ total_lines=row["total_lines"],
552
+ code_lines=row["code_lines"],
553
+ comment_lines=row["comment_lines"],
554
+ blank_lines=row["blank_lines"],
555
+ function_count=row["function_count"],
556
+ class_count=row["class_count"],
557
+ method_count=row["method_count"],
558
+ total_complexity=row["total_complexity"],
559
+ avg_complexity=row["avg_complexity"],
560
+ max_complexity=row["max_complexity"],
561
+ chunks=[], # Chunk history not stored yet
562
+ )
563
+ history.append(fm)
564
+
565
+ logger.debug(f"Retrieved {len(history)} history entries for {file_path}")
566
+
567
+ return history
568
+
569
+ except sqlite3.Error as e:
570
+ logger.error(f"Failed to get file history: {e}")
571
+ raise MetricsStoreError(f"Database query failed: {e}") from e
572
+
573
+ def get_project_history(
574
+ self, project_path: str, limit: int = 10
575
+ ) -> list[ProjectSnapshot]:
576
+ """Get historical snapshots for a project.
577
+
578
+ Args:
579
+ project_path: Path to project root
580
+ limit: Maximum number of snapshots to return
581
+
582
+ Returns:
583
+ List of ProjectSnapshot ordered by timestamp (newest first)
584
+
585
+ Performance: O(n) where n=limit, typically <50ms
586
+ """
587
+ try:
588
+ cursor = self.conn.cursor()
589
+
590
+ cursor.execute(
591
+ """
592
+ SELECT * FROM project_snapshots
593
+ WHERE project_path = ?
594
+ ORDER BY timestamp DESC
595
+ LIMIT ?
596
+ """,
597
+ (project_path, limit),
598
+ )
599
+
600
+ rows = cursor.fetchall()
601
+
602
+ snapshots = [ProjectSnapshot.from_row(row) for row in rows]
603
+
604
+ logger.debug(f"Retrieved {len(snapshots)} snapshots for {project_path}")
605
+
606
+ return snapshots
607
+
608
+ except sqlite3.Error as e:
609
+ logger.error(f"Failed to get project history: {e}")
610
+ raise MetricsStoreError(f"Database query failed: {e}") from e
611
+
612
+ def get_trends(self, project_path: str, days: int = 30) -> TrendData:
613
+ """Analyze complexity trends over time period.
614
+
615
+ Args:
616
+ project_path: Path to project root
617
+ days: Number of days to analyze (from now backwards)
618
+
619
+ Returns:
620
+ TrendData with analyzed trends
621
+
622
+ Performance: O(k) where k=snapshots in period, typically <100ms
623
+
624
+ Example:
625
+ >>> trends = store.get_trends("/path/to/project", days=30)
626
+ >>> if trends.improving:
627
+ ... print("Complexity is trending down!")
628
+ """
629
+ cutoff_date = datetime.now() - timedelta(days=days)
630
+
631
+ try:
632
+ cursor = self.conn.cursor()
633
+
634
+ cursor.execute(
635
+ """
636
+ SELECT * FROM project_snapshots
637
+ WHERE project_path = ?
638
+ AND timestamp >= ?
639
+ ORDER BY timestamp ASC
640
+ """,
641
+ (project_path, cutoff_date.isoformat()),
642
+ )
643
+
644
+ rows = cursor.fetchall()
645
+
646
+ snapshots = [ProjectSnapshot.from_row(row) for row in rows]
647
+
648
+ # Extract trend data
649
+ complexity_trend = [(s.timestamp, s.avg_complexity) for s in snapshots]
650
+
651
+ smell_trend = [(s.timestamp, s.total_smells) for s in snapshots]
652
+
653
+ health_trend = [(s.timestamp, s.avg_health_score) for s in snapshots]
654
+
655
+ # Compute change rate (complexity per day)
656
+ if len(snapshots) >= 2:
657
+ first = snapshots[0]
658
+ last = snapshots[-1]
659
+ time_delta = (last.timestamp - first.timestamp).days
660
+ complexity_delta = last.avg_complexity - first.avg_complexity
661
+
662
+ if time_delta > 0:
663
+ change_rate = complexity_delta / time_delta
664
+ else:
665
+ change_rate = 0.0
666
+ else:
667
+ change_rate = 0.0
668
+
669
+ logger.debug(
670
+ f"Analyzed trends for {project_path}: "
671
+ f"{len(snapshots)} snapshots, change rate {change_rate:.4f}/day"
672
+ )
673
+
674
+ return TrendData(
675
+ project_path=project_path,
676
+ period_days=days,
677
+ snapshots=snapshots,
678
+ complexity_trend=complexity_trend,
679
+ smell_trend=smell_trend,
680
+ health_trend=health_trend,
681
+ change_rate=change_rate,
682
+ )
683
+
684
+ except sqlite3.Error as e:
685
+ logger.error(f"Failed to get trends: {e}")
686
+ raise MetricsStoreError(f"Database query failed: {e}") from e
687
+
688
+ def close(self) -> None:
689
+ """Close database connection.
690
+
691
+ Should be called when done using the store, or use as context manager.
692
+ """
693
+ if self.conn:
694
+ self.conn.close()
695
+ logger.debug("Closed metrics store connection")
696
+
697
+ def __enter__(self) -> MetricsStore:
698
+ """Context manager entry."""
699
+ return self
700
+
701
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
702
+ """Context manager exit - close connection."""
703
+ self.close()
704
+
705
+ def _get_git_info(self, project_root: Path) -> GitInfo:
706
+ """Extract git information from project repository.
707
+
708
+ Args:
709
+ project_root: Project root directory
710
+
711
+ Returns:
712
+ GitInfo with commit, branch, and remote (if available)
713
+
714
+ Note: Does not raise exceptions. Returns GitInfo with None values if git unavailable.
715
+ """
716
+ git_info = GitInfo()
717
+
718
+ try:
719
+ # Get commit hash
720
+ result = subprocess.run(
721
+ ["git", "rev-parse", "HEAD"],
722
+ cwd=project_root,
723
+ capture_output=True,
724
+ text=True,
725
+ check=True,
726
+ timeout=5,
727
+ )
728
+ git_info.commit = result.stdout.strip()
729
+
730
+ # Get branch name
731
+ result = subprocess.run(
732
+ ["git", "rev-parse", "--abbrev-ref", "HEAD"],
733
+ cwd=project_root,
734
+ capture_output=True,
735
+ text=True,
736
+ check=True,
737
+ timeout=5,
738
+ )
739
+ branch = result.stdout.strip()
740
+ git_info.branch = branch if branch != "HEAD" else None
741
+
742
+ # Get remote name (if exists)
743
+ result = subprocess.run(
744
+ ["git", "remote"],
745
+ cwd=project_root,
746
+ capture_output=True,
747
+ text=True,
748
+ check=True,
749
+ timeout=5,
750
+ )
751
+ remotes = result.stdout.strip().split("\n")
752
+ git_info.remote = remotes[0] if remotes and remotes[0] else None
753
+
754
+ except (
755
+ subprocess.CalledProcessError,
756
+ FileNotFoundError,
757
+ subprocess.TimeoutExpired,
758
+ ):
759
+ # Git not available or not a git repo
760
+ logger.debug("Git information unavailable")
761
+
762
+ return git_info