code-memory 1.0.17__tar.gz → 1.0.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code_memory-1.0.17 → code_memory-1.0.18}/PKG-INFO +1 -1
- {code_memory-1.0.17 → code_memory-1.0.18}/db.py +105 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/logging_config.py +6 -6
- {code_memory-1.0.17 → code_memory-1.0.18}/parser.py +235 -53
- {code_memory-1.0.17 → code_memory-1.0.18}/pyproject.toml +1 -1
- {code_memory-1.0.17 → code_memory-1.0.18}/queries.py +107 -5
- {code_memory-1.0.17 → code_memory-1.0.18}/server.py +87 -11
- {code_memory-1.0.17 → code_memory-1.0.18}/tests/test_logging.py +6 -6
- {code_memory-1.0.17 → code_memory-1.0.18}/.github/workflows/ci.yml +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/.github/workflows/publish.yml +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/.github/workflows/release-binaries.yml +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/.gitignore +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/.python-version +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/CHANGELOG.md +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/CONTRIBUTING.md +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/LICENSE +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/Makefile +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/README.md +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/assets/logo.png +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/code-memory.spec +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/doc_parser.py +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/errors.py +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/git_search.py +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/hooks/hook-sentence_transformers.py +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/hooks/hook-sqlite_vec.py +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/hooks/hook-tree_sitter.py +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/hooks/hook-tree_sitter_languages.py +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/prompts/milestone_1.xml +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/prompts/milestone_2.xml +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/prompts/milestone_3.xml +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/prompts/milestone_4.xml +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/prompts/milestone_5.xml +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/prompts/milestone_6.xml +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/tests/__init__.py +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/tests/conftest.py +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/tests/test_errors.py +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/tests/test_tools.py +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/tests/test_validation.py +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/uv.lock +0 -0
- {code_memory-1.0.17 → code_memory-1.0.18}/validation.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-memory
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.18
|
|
4
4
|
Summary: A deterministic, high-precision code intelligence MCP server
|
|
5
5
|
Project-URL: Homepage, https://github.com/kapillamba4/code-memory
|
|
6
6
|
Project-URL: Documentation, https://github.com/kapillamba4/code-memory#readme
|
|
@@ -638,3 +638,108 @@ def upsert_doc_embedding(
|
|
|
638
638
|
)
|
|
639
639
|
if auto_commit:
|
|
640
640
|
db.commit()
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
# ---------------------------------------------------------------------------
|
|
644
|
+
# Index Statistics
|
|
645
|
+
# ---------------------------------------------------------------------------
|
|
646
|
+
|
|
647
|
+
def get_index_stats(db: sqlite3.Connection, project_dir: str) -> dict:
|
|
648
|
+
"""Get comprehensive statistics about the index.
|
|
649
|
+
|
|
650
|
+
Args:
|
|
651
|
+
db: An open sqlite3.Connection.
|
|
652
|
+
project_dir: The project directory path.
|
|
653
|
+
|
|
654
|
+
Returns:
|
|
655
|
+
Dictionary with index health metrics including:
|
|
656
|
+
- Total symbols, files, doc chunks indexed
|
|
657
|
+
- Index freshness (last indexed timestamps)
|
|
658
|
+
- Embedding model info and dimension
|
|
659
|
+
- Database size and WAL status
|
|
660
|
+
"""
|
|
661
|
+
import os
|
|
662
|
+
|
|
663
|
+
# Get counts
|
|
664
|
+
symbols_count = db.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
|
|
665
|
+
files_count = db.execute("SELECT COUNT(*) FROM files").fetchone()[0]
|
|
666
|
+
doc_chunks_count = db.execute("SELECT COUNT(*) FROM doc_chunks").fetchone()[0]
|
|
667
|
+
doc_files_count = db.execute("SELECT COUNT(*) FROM doc_files").fetchone()[0]
|
|
668
|
+
references_count = db.execute("SELECT COUNT(*) FROM references_").fetchone()[0]
|
|
669
|
+
symbol_embeddings_count = db.execute("SELECT COUNT(*) FROM symbol_embeddings").fetchone()[0]
|
|
670
|
+
doc_embeddings_count = db.execute("SELECT COUNT(*) FROM doc_embeddings").fetchone()[0]
|
|
671
|
+
|
|
672
|
+
# Get symbol kinds distribution
|
|
673
|
+
symbol_kinds = dict(db.execute(
|
|
674
|
+
"SELECT kind, COUNT(*) FROM symbols GROUP BY kind ORDER BY COUNT(*) DESC"
|
|
675
|
+
).fetchall())
|
|
676
|
+
|
|
677
|
+
# Get file types distribution (by extension)
|
|
678
|
+
file_extensions = dict(db.execute(
|
|
679
|
+
"""SELECT substr(path, instr(path, '.')) as ext, COUNT(*) as cnt
|
|
680
|
+
FROM files
|
|
681
|
+
WHERE path LIKE '%.%'
|
|
682
|
+
GROUP BY ext
|
|
683
|
+
ORDER BY cnt DESC
|
|
684
|
+
LIMIT 10"""
|
|
685
|
+
).fetchall())
|
|
686
|
+
|
|
687
|
+
# Get last indexed timestamps
|
|
688
|
+
last_file_indexed = db.execute(
|
|
689
|
+
"SELECT MAX(last_modified) FROM files"
|
|
690
|
+
).fetchone()[0]
|
|
691
|
+
last_doc_indexed = db.execute(
|
|
692
|
+
"SELECT MAX(last_modified) FROM doc_files"
|
|
693
|
+
).fetchone()[0]
|
|
694
|
+
|
|
695
|
+
# Get embedding model info
|
|
696
|
+
embedding_model = db.execute(
|
|
697
|
+
"SELECT value FROM index_metadata WHERE key = 'embedding_model'"
|
|
698
|
+
).fetchone()
|
|
699
|
+
embedding_dim = db.execute(
|
|
700
|
+
"SELECT value FROM index_metadata WHERE key = 'embedding_dim'"
|
|
701
|
+
).fetchone()
|
|
702
|
+
|
|
703
|
+
# Database file size
|
|
704
|
+
db_path = os.path.join(os.path.abspath(project_dir), "code_memory.db")
|
|
705
|
+
db_size_bytes = os.path.getsize(db_path) if os.path.exists(db_path) else 0
|
|
706
|
+
db_size_mb = round(db_size_bytes / (1024 * 1024), 2)
|
|
707
|
+
|
|
708
|
+
# WAL status
|
|
709
|
+
wal_path = db_path + "-wal"
|
|
710
|
+
wal_exists = os.path.exists(wal_path)
|
|
711
|
+
wal_size_mb = round(os.path.getsize(wal_path) / (1024 * 1024), 2) if wal_exists else 0
|
|
712
|
+
|
|
713
|
+
# Check journal mode
|
|
714
|
+
journal_mode = db.execute("PRAGMA journal_mode").fetchone()[0]
|
|
715
|
+
|
|
716
|
+
return {
|
|
717
|
+
"indexed": symbols_count > 0 or doc_chunks_count > 0,
|
|
718
|
+
"counts": {
|
|
719
|
+
"symbols": symbols_count,
|
|
720
|
+
"files": files_count,
|
|
721
|
+
"doc_chunks": doc_chunks_count,
|
|
722
|
+
"doc_files": doc_files_count,
|
|
723
|
+
"references": references_count,
|
|
724
|
+
"symbol_embeddings": symbol_embeddings_count,
|
|
725
|
+
"doc_embeddings": doc_embeddings_count,
|
|
726
|
+
},
|
|
727
|
+
"distributions": {
|
|
728
|
+
"symbol_kinds": symbol_kinds,
|
|
729
|
+
"file_extensions": file_extensions,
|
|
730
|
+
},
|
|
731
|
+
"freshness": {
|
|
732
|
+
"last_file_indexed": last_file_indexed,
|
|
733
|
+
"last_doc_indexed": last_doc_indexed,
|
|
734
|
+
},
|
|
735
|
+
"embedding": {
|
|
736
|
+
"model": embedding_model[0] if embedding_model else None,
|
|
737
|
+
"dimension": int(embedding_dim[0]) if embedding_dim else None,
|
|
738
|
+
},
|
|
739
|
+
"database": {
|
|
740
|
+
"size_mb": db_size_mb,
|
|
741
|
+
"journal_mode": journal_mode,
|
|
742
|
+
"wal_exists": wal_exists,
|
|
743
|
+
"wal_size_mb": wal_size_mb,
|
|
744
|
+
},
|
|
745
|
+
}
|
|
@@ -153,9 +153,9 @@ class IndexingLogger:
|
|
|
153
153
|
def __init__(self, indexer_type: str):
|
|
154
154
|
self.indexer_type = indexer_type
|
|
155
155
|
self.logger = get_logger("indexing")
|
|
156
|
-
self.
|
|
156
|
+
self.files_newly_indexed = 0
|
|
157
157
|
self.items_indexed = 0
|
|
158
|
-
self.
|
|
158
|
+
self.files_unchanged = 0
|
|
159
159
|
self.start_time: datetime | None = None
|
|
160
160
|
|
|
161
161
|
def start(self, directory: str) -> None:
|
|
@@ -165,13 +165,13 @@ class IndexingLogger:
|
|
|
165
165
|
|
|
166
166
|
def file_indexed(self, filepath: str, items: int = 1) -> None:
|
|
167
167
|
"""Log successful file indexing."""
|
|
168
|
-
self.
|
|
168
|
+
self.files_newly_indexed += 1
|
|
169
169
|
self.items_indexed += items
|
|
170
170
|
self.logger.debug(f"Indexed {self.indexer_type}: {filepath} ({items} items)")
|
|
171
171
|
|
|
172
172
|
def file_skipped(self, filepath: str, reason: str) -> None:
|
|
173
173
|
"""Log skipped file."""
|
|
174
|
-
self.
|
|
174
|
+
self.files_unchanged += 1
|
|
175
175
|
self.logger.debug(f"Skipped {self.indexer_type}: {filepath} ({reason})")
|
|
176
176
|
|
|
177
177
|
def complete(self) -> None:
|
|
@@ -179,8 +179,8 @@ class IndexingLogger:
|
|
|
179
179
|
duration_ms = (datetime.now() - self.start_time).total_seconds() * 1000 if self.start_time else 0
|
|
180
180
|
self.logger.info(
|
|
181
181
|
f"Completed {self.indexer_type} indexing: "
|
|
182
|
-
f"files={self.
|
|
183
|
-
f"
|
|
182
|
+
f"files={self.files_newly_indexed} items={self.items_indexed} "
|
|
183
|
+
f"unchanged={self.files_unchanged} duration={duration_ms:.1f}ms"
|
|
184
184
|
)
|
|
185
185
|
|
|
186
186
|
def error(self, filepath: str, error_msg: str) -> None:
|
|
@@ -11,6 +11,7 @@ from __future__ import annotations
|
|
|
11
11
|
|
|
12
12
|
import logging
|
|
13
13
|
import os
|
|
14
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
14
15
|
from pathlib import Path
|
|
15
16
|
from typing import Any
|
|
16
17
|
|
|
@@ -21,6 +22,9 @@ import db as db_mod
|
|
|
21
22
|
|
|
22
23
|
logger = logging.getLogger(__name__)
|
|
23
24
|
|
|
25
|
+
# Number of worker threads for parallel indexing (configurable via env)
|
|
26
|
+
MAX_WORKERS = int(os.environ.get("CODE_MEMORY_MAX_WORKERS", "4"))
|
|
27
|
+
|
|
24
28
|
# ── Directories to always skip (even without .gitignore) ───────────────
|
|
25
29
|
_SKIP_DIRS = frozenset({
|
|
26
30
|
".venv", "venv", "__pycache__", ".git", "node_modules",
|
|
@@ -452,7 +456,11 @@ def index_file(filepath: str, db) -> dict:
|
|
|
452
456
|
# ---------------------------------------------------------------------------
|
|
453
457
|
|
|
454
458
|
def index_directory(dirpath: str, db, progress_callback=None) -> list[dict]:
|
|
455
|
-
"""Recursively index all source files under *dirpath
|
|
459
|
+
"""Recursively index all source files under *dirpath* using parallel processing.
|
|
460
|
+
|
|
461
|
+
Uses ThreadPoolExecutor for parallel file I/O and parsing, while keeping
|
|
462
|
+
embedding generation sequential (sentence-transformers releases GIL during
|
|
463
|
+
inference). Processes files in batches for embedding efficiency.
|
|
456
464
|
|
|
457
465
|
Skips directories in ``_SKIP_DIRS``, files matching ``.gitignore`` patterns
|
|
458
466
|
(including nested .gitignore files), and unchanged files. Indexes any file
|
|
@@ -476,7 +484,7 @@ def index_directory(dirpath: str, db, progress_callback=None) -> list[dict]:
|
|
|
476
484
|
gitignore = GitignoreMatcher(dirpath)
|
|
477
485
|
logger.debug("Initialized gitignore matcher for %s", dirpath)
|
|
478
486
|
|
|
479
|
-
# First pass:
|
|
487
|
+
# First pass: collect all files to index
|
|
480
488
|
total_files = 0
|
|
481
489
|
file_list = []
|
|
482
490
|
for root, dirs, files in os.walk(dirpath, topdown=True):
|
|
@@ -494,69 +502,243 @@ def index_directory(dirpath: str, db, progress_callback=None) -> list[dict]:
|
|
|
494
502
|
file_list.append(os.path.join(root, fname))
|
|
495
503
|
total_files += 1
|
|
496
504
|
|
|
497
|
-
|
|
498
|
-
|
|
505
|
+
if not file_list:
|
|
506
|
+
return []
|
|
499
507
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
# Check for .gitignore in current directory and load it
|
|
505
|
-
if rel_root != ".":
|
|
506
|
-
gitignore.check_dir_for_gitignore(root, rel_root)
|
|
508
|
+
# Report initial phase
|
|
509
|
+
if progress_callback:
|
|
510
|
+
progress_callback(0, total_files, "Scanning files for changes...")
|
|
507
511
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
rel_path = os.path.join(rel_root, d) if rel_root != "." else d
|
|
513
|
-
if gitignore.should_skip(rel_path, is_dir=True):
|
|
514
|
-
return False
|
|
515
|
-
return True
|
|
512
|
+
# Phase 1: Parallel file freshness check and parsing
|
|
513
|
+
# Each worker returns parsed data (not yet stored to DB)
|
|
514
|
+
files_processed = 0
|
|
515
|
+
parsed_files: list[tuple[str, dict | None, Exception | None]] = [] # (filepath, parsed_data, error)
|
|
516
516
|
|
|
517
|
-
|
|
517
|
+
def _parse_file_task(fpath: str) -> tuple[str, dict | None, Exception | None]:
|
|
518
|
+
"""Parse a single file and return extracted data (without DB writes)."""
|
|
519
|
+
try:
|
|
520
|
+
parsed = _parse_file_for_indexing(fpath, db)
|
|
521
|
+
return (fpath, parsed, None)
|
|
522
|
+
except Exception as e:
|
|
523
|
+
return (fpath, None, e)
|
|
518
524
|
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
if gitignore.should_skip(rel_path, is_dir=False):
|
|
523
|
-
continue
|
|
525
|
+
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
|
526
|
+
# Submit all parsing tasks
|
|
527
|
+
future_to_path = {executor.submit(_parse_file_task, fpath): fpath for fpath in file_list}
|
|
524
528
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
if ext not in _SOURCE_EXTENSIONS and _load_language(ext) is None:
|
|
529
|
-
continue
|
|
529
|
+
for future in as_completed(future_to_path):
|
|
530
|
+
fpath, parsed_data, error = future.result()
|
|
531
|
+
parsed_files.append((fpath, parsed_data, error))
|
|
530
532
|
|
|
531
|
-
fpath = os.path.join(root, fname)
|
|
532
|
-
try:
|
|
533
|
-
result = index_file(fpath, db)
|
|
534
|
-
results.append(result)
|
|
535
|
-
except Exception:
|
|
536
|
-
logger.exception("Failed to index %s", fpath)
|
|
537
|
-
results.append({
|
|
538
|
-
"file": fpath,
|
|
539
|
-
"symbols_indexed": 0,
|
|
540
|
-
"references_indexed": 0,
|
|
541
|
-
"skipped": True,
|
|
542
|
-
"error": True,
|
|
543
|
-
})
|
|
544
|
-
|
|
545
|
-
# Report progress
|
|
546
533
|
files_processed += 1
|
|
547
534
|
if progress_callback:
|
|
548
|
-
|
|
535
|
+
fname = os.path.basename(fpath)
|
|
536
|
+
progress_callback(files_processed, total_files, f"Parsing: {fname}")
|
|
537
|
+
|
|
538
|
+
# Phase 2: Batch embedding generation (sequential, GIL released during inference)
|
|
539
|
+
if progress_callback:
|
|
540
|
+
progress_callback(total_files, total_files, "Generating embeddings...")
|
|
541
|
+
|
|
542
|
+
# Collect all texts that need embedding
|
|
543
|
+
embedding_batches: list[tuple[str, list[tuple]]] = [] # (filepath, [(embed_text, symbol_data), ...])
|
|
544
|
+
|
|
545
|
+
for fpath, parsed_data, error in parsed_files:
|
|
546
|
+
if error or parsed_data is None or parsed_data.get("skipped"):
|
|
547
|
+
continue
|
|
548
|
+
|
|
549
|
+
embed_inputs = []
|
|
550
|
+
for sym in parsed_data.get("symbols", []):
|
|
551
|
+
embed_input = f"{sym['kind']} {sym['name']}: {sym['source_text'][:1000]}"
|
|
552
|
+
embed_inputs.append((embed_input, sym))
|
|
553
|
+
|
|
554
|
+
if embed_inputs:
|
|
555
|
+
embedding_batches.append((fpath, embed_inputs, parsed_data))
|
|
556
|
+
|
|
557
|
+
# Generate embeddings in batch
|
|
558
|
+
all_embed_texts = []
|
|
559
|
+
for fpath, embed_inputs, _ in embedding_batches:
|
|
560
|
+
for embed_text, _ in embed_inputs:
|
|
561
|
+
all_embed_texts.append(embed_text)
|
|
562
|
+
|
|
563
|
+
all_embeddings = db_mod.embed_texts_batch(all_embed_texts, batch_size=64) if all_embed_texts else []
|
|
564
|
+
|
|
565
|
+
# Phase 3: Sequential DB writes (to avoid SQLite conflicts)
|
|
566
|
+
if progress_callback:
|
|
567
|
+
progress_callback(total_files, total_files, "Storing to database...")
|
|
568
|
+
|
|
569
|
+
embed_idx = 0
|
|
570
|
+
for fpath, parsed_data, error in parsed_files:
|
|
571
|
+
if error:
|
|
572
|
+
logger.exception("Failed to index %s", fpath)
|
|
573
|
+
results.append({
|
|
574
|
+
"file": fpath,
|
|
575
|
+
"symbols_indexed": 0,
|
|
576
|
+
"references_indexed": 0,
|
|
577
|
+
"skipped": True,
|
|
578
|
+
"error": True,
|
|
579
|
+
})
|
|
580
|
+
continue
|
|
581
|
+
|
|
582
|
+
if parsed_data is None or parsed_data.get("skipped"):
|
|
583
|
+
results.append({
|
|
584
|
+
"file": fpath,
|
|
585
|
+
"symbols_indexed": 0,
|
|
586
|
+
"references_indexed": 0,
|
|
587
|
+
"skipped": True,
|
|
588
|
+
})
|
|
589
|
+
continue
|
|
590
|
+
|
|
591
|
+
# Find embeddings for this file
|
|
592
|
+
file_result = _store_parsed_file(fpath, parsed_data, db, embedding_batches, all_embeddings, embed_idx)
|
|
593
|
+
embed_idx += len(parsed_data.get("symbols", []))
|
|
594
|
+
results.append(file_result)
|
|
549
595
|
|
|
550
596
|
# Log performance summary
|
|
551
597
|
total_elapsed = time.perf_counter() - total_start
|
|
552
598
|
total_symbols = sum(r.get("symbols_indexed", 0) for r in results)
|
|
553
599
|
total_refs = sum(r.get("references_indexed", 0) for r in results)
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
600
|
+
files_newly_indexed = sum(1 for r in results if not r.get("skipped"))
|
|
601
|
+
files_unchanged = sum(1 for r in results if r.get("skipped") and not r.get("error"))
|
|
602
|
+
|
|
603
|
+
if total_files > 0:
|
|
604
|
+
files_per_sec = total_files / total_elapsed if total_elapsed > 0 else 0
|
|
605
|
+
logger.info(
|
|
606
|
+
"Indexed %d files (%d unchanged) in %.2fs (%.1f files/s) - %d symbols, %d references",
|
|
607
|
+
files_newly_indexed, files_unchanged, total_elapsed, files_per_sec, total_symbols, total_refs
|
|
608
|
+
)
|
|
609
|
+
else:
|
|
610
|
+
logger.info(
|
|
611
|
+
"Indexed %d files (%d unchanged) in %.2fs - %d symbols, %d references",
|
|
612
|
+
files_newly_indexed, files_unchanged, total_elapsed, total_symbols, total_refs
|
|
613
|
+
)
|
|
561
614
|
|
|
562
615
|
return results
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def _parse_file_for_indexing(filepath: str, db) -> dict | None:
|
|
619
|
+
"""Parse a file and extract symbols/references without DB writes.
|
|
620
|
+
|
|
621
|
+
Returns parsed data structure or None if skipped.
|
|
622
|
+
"""
|
|
623
|
+
filepath = os.path.abspath(filepath)
|
|
624
|
+
ext = os.path.splitext(filepath)[1].lower()
|
|
625
|
+
|
|
626
|
+
# Check freshness
|
|
627
|
+
mtime = os.path.getmtime(filepath)
|
|
628
|
+
row = db.execute(
|
|
629
|
+
"SELECT id, last_modified FROM files WHERE path = ?", (filepath,)
|
|
630
|
+
).fetchone()
|
|
631
|
+
|
|
632
|
+
if row and row[1] >= mtime:
|
|
633
|
+
return {"skipped": True, "file_id": row[0]}
|
|
634
|
+
|
|
635
|
+
# Read file
|
|
636
|
+
source_bytes = Path(filepath).read_bytes()
|
|
637
|
+
source_text = source_bytes.decode("utf-8", errors="replace")
|
|
638
|
+
|
|
639
|
+
fhash = db_mod.file_hash(filepath)
|
|
640
|
+
|
|
641
|
+
result = {
|
|
642
|
+
"skipped": False,
|
|
643
|
+
"mtime": mtime,
|
|
644
|
+
"fhash": fhash,
|
|
645
|
+
"symbols": [],
|
|
646
|
+
"references": [],
|
|
647
|
+
"fallback": False,
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
# Try tree-sitter parsing
|
|
651
|
+
lang = _load_language(ext)
|
|
652
|
+
|
|
653
|
+
if lang is not None:
|
|
654
|
+
parser = Parser(lang)
|
|
655
|
+
tree = parser.parse(source_bytes)
|
|
656
|
+
|
|
657
|
+
# Extract symbols (flat list for batch processing)
|
|
658
|
+
raw_symbols = _extract_symbols(tree.root_node, source_bytes)
|
|
659
|
+
all_symbols: list[dict] = []
|
|
660
|
+
|
|
661
|
+
def _collect_symbols(sym_list):
|
|
662
|
+
for sym in sym_list:
|
|
663
|
+
all_symbols.append(sym)
|
|
664
|
+
if sym.get("children"):
|
|
665
|
+
_collect_symbols(sym["children"])
|
|
666
|
+
|
|
667
|
+
_collect_symbols(raw_symbols)
|
|
668
|
+
result["symbols"] = all_symbols
|
|
669
|
+
|
|
670
|
+
# Extract references
|
|
671
|
+
refs = _extract_references(tree.root_node, source_bytes)
|
|
672
|
+
result["references"] = refs
|
|
673
|
+
else:
|
|
674
|
+
# Fallback: entire file as one symbol
|
|
675
|
+
basename = os.path.basename(filepath)
|
|
676
|
+
result["symbols"] = [{
|
|
677
|
+
"name": basename,
|
|
678
|
+
"kind": "file",
|
|
679
|
+
"line_start": 1,
|
|
680
|
+
"line_end": source_text.count("\n") + 1,
|
|
681
|
+
"source_text": source_text[:5000],
|
|
682
|
+
"parent_id": None,
|
|
683
|
+
}]
|
|
684
|
+
result["fallback"] = True
|
|
685
|
+
|
|
686
|
+
return result
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def _store_parsed_file(
|
|
690
|
+
filepath: str,
|
|
691
|
+
parsed_data: dict,
|
|
692
|
+
db,
|
|
693
|
+
embedding_batches: list,
|
|
694
|
+
all_embeddings: list,
|
|
695
|
+
start_embed_idx: int
|
|
696
|
+
) -> dict:
|
|
697
|
+
"""Store parsed file data to database with pre-computed embeddings."""
|
|
698
|
+
filepath = os.path.abspath(filepath)
|
|
699
|
+
|
|
700
|
+
# Upsert file record
|
|
701
|
+
file_id = db_mod.upsert_file(db, filepath, parsed_data["mtime"], parsed_data["fhash"])
|
|
702
|
+
|
|
703
|
+
# Delete stale data
|
|
704
|
+
db_mod.delete_file_data(db, file_id)
|
|
705
|
+
|
|
706
|
+
symbols_indexed = 0
|
|
707
|
+
references_indexed = 0
|
|
708
|
+
|
|
709
|
+
# Find embeddings for this file
|
|
710
|
+
file_embeddings = None
|
|
711
|
+
embed_offset = 0
|
|
712
|
+
for bfpath, embed_inputs, _ in embedding_batches:
|
|
713
|
+
if bfpath == filepath:
|
|
714
|
+
file_embeddings = all_embeddings[start_embed_idx + embed_offset:start_embed_idx + embed_offset + len(embed_inputs)]
|
|
715
|
+
break
|
|
716
|
+
embed_offset += len(embed_inputs)
|
|
717
|
+
|
|
718
|
+
# Store symbols with embeddings
|
|
719
|
+
if parsed_data.get("symbols") and file_embeddings:
|
|
720
|
+
with db_mod.transaction(db):
|
|
721
|
+
for i, sym in enumerate(parsed_data["symbols"]):
|
|
722
|
+
sym_id = db_mod.upsert_symbol(
|
|
723
|
+
db, sym["name"], sym["kind"], file_id,
|
|
724
|
+
sym["line_start"], sym["line_end"],
|
|
725
|
+
sym.get("parent_id"), sym["source_text"],
|
|
726
|
+
auto_commit=False
|
|
727
|
+
)
|
|
728
|
+
if i < len(file_embeddings):
|
|
729
|
+
db_mod.upsert_embedding(db, sym_id, file_embeddings[i], auto_commit=False)
|
|
730
|
+
symbols_indexed += 1
|
|
731
|
+
|
|
732
|
+
# Store references
|
|
733
|
+
if parsed_data.get("references"):
|
|
734
|
+
with db_mod.transaction(db):
|
|
735
|
+
for ref in parsed_data["references"]:
|
|
736
|
+
db_mod.upsert_reference(db, ref["name"], file_id, ref["line"], auto_commit=False)
|
|
737
|
+
references_indexed += 1
|
|
738
|
+
|
|
739
|
+
return {
|
|
740
|
+
"file": filepath,
|
|
741
|
+
"symbols_indexed": symbols_indexed,
|
|
742
|
+
"references_indexed": references_indexed,
|
|
743
|
+
"skipped": False,
|
|
744
|
+
}
|
|
@@ -108,7 +108,8 @@ def hybrid_search(query: str, db, top_k: int = 10) -> list[dict]:
|
|
|
108
108
|
top_k: Number of results to return.
|
|
109
109
|
|
|
110
110
|
Returns:
|
|
111
|
-
A list of result dicts sorted by descending RRF score
|
|
111
|
+
A list of result dicts sorted by descending RRF score, including
|
|
112
|
+
match_reason, match_highlights, and confidence.
|
|
112
113
|
"""
|
|
113
114
|
bm25_results = _bm25_search(query, db, top_k=50)
|
|
114
115
|
vec_results = _vector_search(query, db, top_k=50)
|
|
@@ -116,6 +117,7 @@ def hybrid_search(query: str, db, top_k: int = 10) -> list[dict]:
|
|
|
116
117
|
# Build RRF score map keyed by symbol_id
|
|
117
118
|
scores: dict[int, float] = {}
|
|
118
119
|
details: dict[int, dict] = {}
|
|
120
|
+
match_sources: dict[int, list[str]] = {} # Track which search found each result
|
|
119
121
|
|
|
120
122
|
for rank, r in enumerate(bm25_results, start=1):
|
|
121
123
|
sid = r["symbol_id"]
|
|
@@ -128,6 +130,8 @@ def hybrid_search(query: str, db, top_k: int = 10) -> list[dict]:
|
|
|
128
130
|
"line_end": r["line_end"],
|
|
129
131
|
"source_text": r["source_text"],
|
|
130
132
|
}
|
|
133
|
+
match_sources[sid] = match_sources.get(sid, [])
|
|
134
|
+
match_sources[sid].append("bm25")
|
|
131
135
|
|
|
132
136
|
for rank, r in enumerate(vec_results, start=1):
|
|
133
137
|
sid = r["symbol_id"]
|
|
@@ -141,14 +145,112 @@ def hybrid_search(query: str, db, top_k: int = 10) -> list[dict]:
|
|
|
141
145
|
"line_end": r["line_end"],
|
|
142
146
|
"source_text": r["source_text"],
|
|
143
147
|
}
|
|
148
|
+
match_sources[sid] = match_sources.get(sid, [])
|
|
149
|
+
if "vector" not in match_sources[sid]:
|
|
150
|
+
match_sources[sid].append("vector")
|
|
144
151
|
|
|
145
152
|
# Sort by descending RRF score
|
|
146
153
|
ranked = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)[:top_k]
|
|
147
154
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
155
|
+
# Build results with match metadata
|
|
156
|
+
results = []
|
|
157
|
+
for sid, score in ranked:
|
|
158
|
+
sources = match_sources.get(sid, [])
|
|
159
|
+
is_hybrid = len(sources) == 2
|
|
160
|
+
|
|
161
|
+
# Determine match reason
|
|
162
|
+
if is_hybrid:
|
|
163
|
+
match_reason = "hybrid (BM25 + semantic)"
|
|
164
|
+
elif "bm25" in sources:
|
|
165
|
+
match_reason = "keyword match (BM25)"
|
|
166
|
+
else:
|
|
167
|
+
match_reason = "semantic match (vector)"
|
|
168
|
+
|
|
169
|
+
# Calculate confidence (normalize RRF score to 0-1 range)
|
|
170
|
+
# Max possible RRF score for a single source is 1/61 ≈ 0.0164
|
|
171
|
+
# For hybrid it's 2/61 ≈ 0.0328. We normalize accordingly.
|
|
172
|
+
max_single_rrf = 1.0 / (_RRF_K + 1) # ≈ 0.0164
|
|
173
|
+
max_hybrid_rrf = 2.0 * max_single_rrf # ≈ 0.0328
|
|
174
|
+
if is_hybrid:
|
|
175
|
+
confidence = min(1.0, score / max_hybrid_rrf)
|
|
176
|
+
else:
|
|
177
|
+
confidence = min(1.0, (score / max_single_rrf) * 0.7) # Cap single-source at 0.7
|
|
178
|
+
|
|
179
|
+
result = {
|
|
180
|
+
**details[sid],
|
|
181
|
+
"score": round(score, 6),
|
|
182
|
+
"match_reason": match_reason,
|
|
183
|
+
"confidence": round(confidence, 3),
|
|
184
|
+
"match_highlights": [], # Will be populated below if BM25 match
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
# Get highlights for BM25 matches using FTS5 highlight function
|
|
188
|
+
if "bm25" in sources:
|
|
189
|
+
highlights = _get_bm25_highlights(query, details[sid]["source_text"], db)
|
|
190
|
+
result["match_highlights"] = highlights
|
|
191
|
+
|
|
192
|
+
results.append(result)
|
|
193
|
+
|
|
194
|
+
return results
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _get_bm25_highlights(query: str, source_text: str, db) -> list[str]:
|
|
198
|
+
"""Extract highlighted snippets using FTS5.
|
|
199
|
+
|
|
200
|
+
Returns up to 3 highlighted text snippets showing where the query matched.
|
|
201
|
+
"""
|
|
202
|
+
if not source_text or not query:
|
|
203
|
+
return []
|
|
204
|
+
|
|
205
|
+
# Use FTS5 highlight function to get matched portions
|
|
206
|
+
safe_query = query.replace('"', '""')
|
|
207
|
+
try:
|
|
208
|
+
# Create a temporary FTS5 query to get highlights
|
|
209
|
+
# We use the snippet function which returns highlighted fragments
|
|
210
|
+
rows = db.execute(
|
|
211
|
+
"""
|
|
212
|
+
SELECT snippet(symbols_fts, 1, '>>>', '<<<', '...', 20) as highlight
|
|
213
|
+
FROM symbols_fts
|
|
214
|
+
WHERE symbols_fts MATCH ?
|
|
215
|
+
LIMIT 3
|
|
216
|
+
""",
|
|
217
|
+
(safe_query,),
|
|
218
|
+
).fetchall()
|
|
219
|
+
|
|
220
|
+
highlights = []
|
|
221
|
+
for row in rows:
|
|
222
|
+
if row[0] and row[0] not in ("...", ""):
|
|
223
|
+
# Clean up the highlight markers for readability
|
|
224
|
+
highlight = row[0].replace(">>>", "**").replace("<<<", "**")
|
|
225
|
+
if len(highlight) > 10: # Only include meaningful highlights
|
|
226
|
+
highlights.append(highlight)
|
|
227
|
+
|
|
228
|
+
return highlights[:3] # Return at most 3 highlights
|
|
229
|
+
except Exception:
|
|
230
|
+
# Fallback: find query terms in source text
|
|
231
|
+
return _simple_highlights(query, source_text)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _simple_highlights(query: str, source_text: str) -> list[str]:
|
|
235
|
+
"""Simple fallback highlight extraction when FTS5 isn't available."""
|
|
236
|
+
highlights = []
|
|
237
|
+
query_terms = query.lower().split()
|
|
238
|
+
lines = source_text.split("\n")
|
|
239
|
+
|
|
240
|
+
for line in lines[:20]: # Check first 20 lines
|
|
241
|
+
line_lower = line.lower()
|
|
242
|
+
for term in query_terms:
|
|
243
|
+
if term in line_lower and len(line.strip()) > 10:
|
|
244
|
+
# Truncate long lines
|
|
245
|
+
snippet = line.strip()[:100]
|
|
246
|
+
if len(snippet) > 50:
|
|
247
|
+
snippet = snippet[:97] + "..."
|
|
248
|
+
highlights.append(snippet)
|
|
249
|
+
break
|
|
250
|
+
if len(highlights) >= 3:
|
|
251
|
+
break
|
|
252
|
+
|
|
253
|
+
return highlights[:3]
|
|
152
254
|
|
|
153
255
|
|
|
154
256
|
# ---------------------------------------------------------------------------
|
|
@@ -128,6 +128,45 @@ def check_index_status(directory: str) -> dict:
|
|
|
128
128
|
}
|
|
129
129
|
|
|
130
130
|
|
|
131
|
+
# ── Tool 0.5: get_index_stats ─────────────────────────────────────────────
|
|
132
|
+
@mcp.tool()
|
|
133
|
+
def get_index_stats(directory: str) -> dict:
|
|
134
|
+
"""USE THIS TOOL to get comprehensive statistics about the code index.
|
|
135
|
+
|
|
136
|
+
This tool provides detailed metrics about the index health, including
|
|
137
|
+
file counts, symbol distributions, embedding model info, and database size.
|
|
138
|
+
|
|
139
|
+
TRIGGER - Call this tool when:
|
|
140
|
+
- You want to understand what's in the index
|
|
141
|
+
- Debugging search quality issues
|
|
142
|
+
- Checking index freshness or coverage
|
|
143
|
+
- Monitoring database size and health
|
|
144
|
+
|
|
145
|
+
Do NOT use this tool for:
|
|
146
|
+
- Checking if indexing is needed (use check_index_status)
|
|
147
|
+
- Searching for code (use search_code)
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
directory: Path to the project directory.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Dictionary with:
|
|
154
|
+
- indexed: boolean - true if anything has been indexed
|
|
155
|
+
- counts: Symbol, file, chunk, and embedding counts
|
|
156
|
+
- distributions: Symbol kinds and file extensions
|
|
157
|
+
- freshness: Last indexed timestamps
|
|
158
|
+
- embedding: Model name and dimension
|
|
159
|
+
- database: Size, journal mode, and WAL status
|
|
160
|
+
"""
|
|
161
|
+
with logging_config.ToolLogger("get_index_stats", directory=directory):
|
|
162
|
+
try:
|
|
163
|
+
database = db_mod.get_db(directory)
|
|
164
|
+
stats = db_mod.get_index_stats(database, directory)
|
|
165
|
+
return {"status": "ok", **stats}
|
|
166
|
+
except Exception as e:
|
|
167
|
+
return errors.format_error(e)
|
|
168
|
+
|
|
169
|
+
|
|
131
170
|
# ── Tool 1: search_code ───────────────────────────────────────────────────
|
|
132
171
|
@mcp.tool()
|
|
133
172
|
def search_code(
|
|
@@ -294,6 +333,7 @@ async def index_codebase(directory: str, ctx: Context) -> dict:
|
|
|
294
333
|
- Enables semantic search via vector embeddings
|
|
295
334
|
- Builds cross-reference graphs for "find all usages" queries
|
|
296
335
|
- Incremental indexing: unchanged files are automatically skipped
|
|
336
|
+
- PARALLEL PROCESSING: Uses thread pool for faster indexing
|
|
297
337
|
|
|
298
338
|
Do NOT use this tool for:
|
|
299
339
|
- Non-code files (images, binaries, data files)
|
|
@@ -306,6 +346,8 @@ async def index_codebase(directory: str, ctx: Context) -> dict:
|
|
|
306
346
|
Returns:
|
|
307
347
|
Summary with files_indexed, total_symbols, total_chunks, and details.
|
|
308
348
|
"""
|
|
349
|
+
import time
|
|
350
|
+
|
|
309
351
|
with logging_config.ToolLogger("index_codebase", directory=directory) as log:
|
|
310
352
|
try:
|
|
311
353
|
# Validate directory
|
|
@@ -313,20 +355,38 @@ async def index_codebase(directory: str, ctx: Context) -> dict:
|
|
|
313
355
|
|
|
314
356
|
database = db_mod.get_db(str(directory_path))
|
|
315
357
|
|
|
358
|
+
# Track timing for throughput calculation
|
|
359
|
+
start_time = time.perf_counter()
|
|
360
|
+
|
|
316
361
|
# Report initial progress
|
|
317
362
|
await ctx.report_progress(0, 100, "Starting indexing...")
|
|
318
363
|
|
|
319
364
|
# Create progress callback that schedules progress updates on the event loop
|
|
320
365
|
loop = asyncio.get_running_loop()
|
|
321
|
-
progress_state = {"current": 0, "total": 0, "phase": "
|
|
366
|
+
progress_state = {"current": 0, "total": 0, "phase": "scanning"}
|
|
322
367
|
|
|
323
368
|
def sync_progress_callback(current: int, total: int, message: str):
|
|
324
|
-
"""Sync callback that schedules async progress reporting."""
|
|
369
|
+
"""Sync callback that schedules async progress reporting with throughput info."""
|
|
325
370
|
progress_state["current"] = current
|
|
326
371
|
progress_state["total"] = total
|
|
372
|
+
|
|
373
|
+
# Calculate throughput and ETA
|
|
374
|
+
elapsed = time.perf_counter() - start_time
|
|
375
|
+
if elapsed > 0 and current > 0:
|
|
376
|
+
files_per_sec = current / elapsed
|
|
377
|
+
if files_per_sec > 0 and total > current:
|
|
378
|
+
remaining_files = total - current
|
|
379
|
+
eta_seconds = remaining_files / files_per_sec
|
|
380
|
+
eta_str = f", ETA: {int(eta_seconds)}s" if eta_seconds < 60 else f", ETA: {int(eta_seconds / 60)}m"
|
|
381
|
+
else:
|
|
382
|
+
eta_str = ""
|
|
383
|
+
throughput_str = f" ({files_per_sec:.1f} files/s{eta_str})"
|
|
384
|
+
else:
|
|
385
|
+
throughput_str = ""
|
|
386
|
+
|
|
327
387
|
# Schedule the async progress report on the event loop
|
|
328
388
|
asyncio.run_coroutine_threadsafe(
|
|
329
|
-
ctx.report_progress(current, total, message),
|
|
389
|
+
ctx.report_progress(current, total, f"{message}{throughput_str}"),
|
|
330
390
|
loop
|
|
331
391
|
)
|
|
332
392
|
|
|
@@ -334,7 +394,7 @@ async def index_codebase(directory: str, ctx: Context) -> dict:
|
|
|
334
394
|
code_logger = logging_config.IndexingLogger("code")
|
|
335
395
|
code_logger.start(str(directory_path))
|
|
336
396
|
|
|
337
|
-
await ctx.report_progress(0, 100, "Scanning code files...")
|
|
397
|
+
await ctx.report_progress(0, 100, "Phase 1/3: Scanning code files...")
|
|
338
398
|
|
|
339
399
|
code_results = await asyncio.to_thread(
|
|
340
400
|
parser_mod.index_directory,
|
|
@@ -361,7 +421,7 @@ async def index_codebase(directory: str, ctx: Context) -> dict:
|
|
|
361
421
|
code_file_count = len(code_results)
|
|
362
422
|
doc_progress_offset = code_file_count
|
|
363
423
|
|
|
364
|
-
await ctx.report_progress(code_file_count, code_file_count, "Scanning documentation files...")
|
|
424
|
+
await ctx.report_progress(code_file_count, code_file_count, "Phase 2/3: Scanning documentation files...")
|
|
365
425
|
|
|
366
426
|
doc_results = await asyncio.to_thread(
|
|
367
427
|
doc_parser_mod.index_doc_directory,
|
|
@@ -383,7 +443,7 @@ async def index_codebase(directory: str, ctx: Context) -> dict:
|
|
|
383
443
|
doc_skipped = [r for r in doc_results if r.get("skipped")]
|
|
384
444
|
|
|
385
445
|
# Extract docstrings from indexed code
|
|
386
|
-
await ctx.report_progress(0, 0, "Extracting docstrings...")
|
|
446
|
+
await ctx.report_progress(0, 0, "Phase 3/3: Extracting docstrings...")
|
|
387
447
|
docstring_results = await asyncio.to_thread(
|
|
388
448
|
doc_parser_mod.extract_docstrings_from_code,
|
|
389
449
|
database
|
|
@@ -393,20 +453,36 @@ async def index_codebase(directory: str, ctx: Context) -> dict:
|
|
|
393
453
|
total_chunks = sum(r.get("chunks_indexed", 0) for r in doc_indexed)
|
|
394
454
|
log.set_result_count(total_symbols + total_chunks + len(docstring_results))
|
|
395
455
|
|
|
396
|
-
|
|
456
|
+
# Calculate final throughput
|
|
457
|
+
total_elapsed = time.perf_counter() - start_time
|
|
458
|
+
total_files = len(code_results) + len(doc_results)
|
|
459
|
+
files_per_sec = total_files / total_elapsed if total_elapsed > 0 else 0
|
|
460
|
+
|
|
461
|
+
await ctx.report_progress(100, 100, f"Indexing complete! ({files_per_sec:.1f} files/s)")
|
|
462
|
+
|
|
463
|
+
# Get total indexed counts from database for cumulative stats
|
|
464
|
+
total_code_files = database.execute("SELECT COUNT(*) FROM files").fetchone()[0]
|
|
465
|
+
total_doc_files = database.execute("SELECT COUNT(*) FROM doc_files").fetchone()[0]
|
|
397
466
|
|
|
398
467
|
return {
|
|
399
468
|
"status": "ok",
|
|
400
469
|
"directory": str(directory_path),
|
|
470
|
+
"performance": {
|
|
471
|
+
"total_time_seconds": round(total_elapsed, 2),
|
|
472
|
+
"files_per_second": round(files_per_sec, 1),
|
|
473
|
+
"total_files_processed": total_files,
|
|
474
|
+
},
|
|
401
475
|
"code": {
|
|
402
|
-
"
|
|
403
|
-
"
|
|
476
|
+
"files_newly_indexed": len(indexed),
|
|
477
|
+
"files_unchanged": len(skipped),
|
|
478
|
+
"total_indexed_files": total_code_files,
|
|
404
479
|
"total_symbols": total_symbols,
|
|
405
480
|
"total_references": sum(r.get("references_indexed", 0) for r in indexed),
|
|
406
481
|
},
|
|
407
482
|
"documentation": {
|
|
408
|
-
"
|
|
409
|
-
"
|
|
483
|
+
"files_newly_indexed": len(doc_indexed),
|
|
484
|
+
"files_unchanged": len(doc_skipped),
|
|
485
|
+
"total_indexed_files": total_doc_files,
|
|
410
486
|
"total_chunks": total_chunks,
|
|
411
487
|
"docstrings_extracted": len(docstring_results),
|
|
412
488
|
},
|
|
@@ -133,19 +133,19 @@ class TestToolLogger:
|
|
|
133
133
|
class TestIndexingLogger:
|
|
134
134
|
"""Tests for IndexingLogger class."""
|
|
135
135
|
|
|
136
|
-
def
|
|
137
|
-
"""Test that files
|
|
136
|
+
def test_tracks_files_newly_indexed(self):
|
|
137
|
+
"""Test that files newly indexed are tracked."""
|
|
138
138
|
idx_logger = logging_config.IndexingLogger("test")
|
|
139
139
|
idx_logger.file_indexed("file1.py", 3)
|
|
140
140
|
idx_logger.file_indexed("file2.py", 2)
|
|
141
|
-
assert idx_logger.
|
|
141
|
+
assert idx_logger.files_newly_indexed == 2
|
|
142
142
|
assert idx_logger.items_indexed == 5
|
|
143
143
|
|
|
144
|
-
def
|
|
145
|
-
"""Test that files
|
|
144
|
+
def test_tracks_files_unchanged(self):
|
|
145
|
+
"""Test that files unchanged are tracked."""
|
|
146
146
|
idx_logger = logging_config.IndexingLogger("test")
|
|
147
147
|
idx_logger.file_skipped("file1.py", "unchanged")
|
|
148
|
-
assert idx_logger.
|
|
148
|
+
assert idx_logger.files_unchanged == 1
|
|
149
149
|
|
|
150
150
|
|
|
151
151
|
class TestPreconfiguredLoggers:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|