code-memory 1.0.3__tar.gz → 1.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {code_memory-1.0.3 → code_memory-1.0.5}/PKG-INFO +2 -1
  2. {code_memory-1.0.3 → code_memory-1.0.5}/db.py +143 -23
  3. {code_memory-1.0.3 → code_memory-1.0.5}/doc_parser.py +97 -65
  4. {code_memory-1.0.3 → code_memory-1.0.5}/git_search.py +4 -5
  5. {code_memory-1.0.3 → code_memory-1.0.5}/logging_config.py +24 -1
  6. {code_memory-1.0.3 → code_memory-1.0.5}/parser.py +67 -42
  7. code_memory-1.0.5/prompts/milestone_6.xml +756 -0
  8. {code_memory-1.0.3 → code_memory-1.0.5}/pyproject.toml +2 -1
  9. {code_memory-1.0.3 → code_memory-1.0.5}/queries.py +99 -2
  10. {code_memory-1.0.3 → code_memory-1.0.5}/server.py +55 -18
  11. {code_memory-1.0.3 → code_memory-1.0.5}/tests/test_errors.py +2 -4
  12. {code_memory-1.0.3 → code_memory-1.0.5}/tests/test_logging.py +1 -3
  13. {code_memory-1.0.3 → code_memory-1.0.5}/tests/test_tools.py +0 -3
  14. {code_memory-1.0.3 → code_memory-1.0.5}/uv.lock +71 -1
  15. {code_memory-1.0.3 → code_memory-1.0.5}/validation.py +0 -1
  16. {code_memory-1.0.3 → code_memory-1.0.5}/.github/workflows/ci.yml +0 -0
  17. {code_memory-1.0.3 → code_memory-1.0.5}/.github/workflows/publish.yml +0 -0
  18. {code_memory-1.0.3 → code_memory-1.0.5}/.gitignore +0 -0
  19. {code_memory-1.0.3 → code_memory-1.0.5}/.python-version +0 -0
  20. {code_memory-1.0.3 → code_memory-1.0.5}/CHANGELOG.md +0 -0
  21. {code_memory-1.0.3 → code_memory-1.0.5}/CONTRIBUTING.md +0 -0
  22. {code_memory-1.0.3 → code_memory-1.0.5}/LICENSE +0 -0
  23. {code_memory-1.0.3 → code_memory-1.0.5}/Makefile +0 -0
  24. {code_memory-1.0.3 → code_memory-1.0.5}/README.md +0 -0
  25. {code_memory-1.0.3 → code_memory-1.0.5}/errors.py +0 -0
  26. {code_memory-1.0.3 → code_memory-1.0.5}/prompts/milestone_1.xml +0 -0
  27. {code_memory-1.0.3 → code_memory-1.0.5}/prompts/milestone_2.xml +0 -0
  28. {code_memory-1.0.3 → code_memory-1.0.5}/prompts/milestone_3.xml +0 -0
  29. {code_memory-1.0.3 → code_memory-1.0.5}/prompts/milestone_4.xml +0 -0
  30. {code_memory-1.0.3 → code_memory-1.0.5}/prompts/milestone_5.xml +0 -0
  31. {code_memory-1.0.3 → code_memory-1.0.5}/tests/__init__.py +0 -0
  32. {code_memory-1.0.3 → code_memory-1.0.5}/tests/conftest.py +1 -1
  33. {code_memory-1.0.3 → code_memory-1.0.5}/tests/test_validation.py +1 -1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code-memory
3
- Version: 1.0.3
3
+ Version: 1.0.5
4
4
  Summary: A deterministic, high-precision code intelligence MCP server
5
5
  Project-URL: Homepage, https://github.com/kapillamba4/code-memory
6
6
  Project-URL: Documentation, https://github.com/kapillamba4/code-memory#readme
@@ -32,6 +32,7 @@ Requires-Dist: tree-sitter-ruby>=0.23.1
32
32
  Requires-Dist: tree-sitter-rust>=0.24.0
33
33
  Requires-Dist: tree-sitter-typescript>=0.23.2
34
34
  Requires-Dist: tree-sitter>=0.25.2
35
+ Requires-Dist: xxhash>=3.6.0
35
36
  Provides-Extra: dev
36
37
  Requires-Dist: mypy>=1.13.0; extra == 'dev'
37
38
  Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
@@ -11,16 +11,19 @@ All writes use upsert semantics so re-indexing is idempotent.
11
11
 
12
12
  from __future__ import annotations
13
13
 
14
- import hashlib
14
+ import logging
15
15
  import sqlite3
16
- from pathlib import Path
16
+ from contextlib import contextmanager
17
17
  from typing import TYPE_CHECKING
18
18
 
19
19
  import sqlite_vec
20
+ import xxhash
20
21
 
21
22
  if TYPE_CHECKING:
22
23
  pass
23
24
 
25
+ logger = logging.getLogger(__name__)
26
+
24
27
  # ---------------------------------------------------------------------------
25
28
  # Embedding model (lazy-loaded singleton)
26
29
  # ---------------------------------------------------------------------------
@@ -42,10 +45,80 @@ def get_embedding_model():
42
45
  def embed_text(text: str) -> list[float]:
43
46
  """Generate a 384-dim dense vector embedding for *text*."""
44
47
  model = get_embedding_model()
45
- vec = model.encode(text, normalize_embeddings=True)
48
+ vec = model.encode(text, normalize_embeddings=True, show_progress_bar=False)
46
49
  return vec.tolist()
47
50
 
48
51
 
52
+ def embed_texts_batch(texts: list[str], batch_size: int = 32) -> list[list[float]]:
53
+ """Generate embeddings for multiple texts at once.
54
+
55
+ This is significantly faster than calling embed_text() in a loop
56
+ because sentence-transformers is optimized for batch processing.
57
+
58
+ Args:
59
+ texts: List of text strings to embed.
60
+ batch_size: Number of texts to process per batch (default 32).
61
+
62
+ Returns:
63
+ List of embedding vectors (same order as input texts).
64
+ """
65
+ if not texts:
66
+ return []
67
+
68
+ model = get_embedding_model()
69
+
70
+ # Batch encode with normalization (same as single-text version)
71
+ vectors = model.encode(
72
+ texts,
73
+ batch_size=batch_size,
74
+ normalize_embeddings=True,
75
+ show_progress_bar=False,
76
+ convert_to_numpy=True,
77
+ )
78
+
79
+ return [v.tolist() for v in vectors]
80
+
81
+
82
+ def warmup_embedding_model() -> None:
83
+ """Pre-load and warm up the embedding model.
84
+
85
+ Call this at server startup to avoid cold-start latency on first search.
86
+ The warmup encodes a dummy string to initialize internal tensors.
87
+ """
88
+ model = get_embedding_model()
89
+ # Warmup encode to initialize lazy-loaded components
90
+ model.encode("warmup", normalize_embeddings=True, show_progress_bar=False)
91
+ logger.info("Embedding model warmed up")
92
+
93
+
94
+ # ---------------------------------------------------------------------------
95
+ # Transaction support
96
+ # ---------------------------------------------------------------------------
97
+
98
+
99
+ @contextmanager
100
+ def transaction(db: sqlite3.Connection):
101
+ """Context manager for explicit transaction control.
102
+
103
+ Disables autocommit, yields control, then commits on success.
104
+ On exception, rolls back automatically.
105
+
106
+ Example:
107
+ with transaction(db):
108
+ for item in items:
109
+ upsert_symbol(db, ..., auto_commit=False)
110
+ # Single commit here
111
+ """
112
+ # Disable autocommit by starting a transaction
113
+ db.execute("BEGIN")
114
+ try:
115
+ yield db
116
+ db.commit()
117
+ except Exception:
118
+ db.rollback()
119
+ raise
120
+
121
+
49
122
  # ---------------------------------------------------------------------------
50
123
  # Database initialisation
51
124
  # ---------------------------------------------------------------------------
@@ -210,17 +283,34 @@ def get_db(db_path: str = "code_memory.db") -> sqlite3.Connection:
210
283
 
211
284
 
212
285
  def file_hash(filepath: str) -> str:
213
- """Compute SHA-256 hex digest of a file's contents."""
214
- h = hashlib.sha256()
286
+ """Compute fast non-cryptographic hash of a file's contents.
287
+
288
+ Uses xxHash (xxh64) which is ~10x faster than SHA-256 while still
289
+ providing excellent collision resistance for change detection.
290
+
291
+ Args:
292
+ filepath: Path to the file to hash.
293
+
294
+ Returns:
295
+ Hexadecimal string representation of the 64-bit hash.
296
+ """
297
+ h = xxhash.xxh64()
215
298
  with open(filepath, "rb") as f:
216
- for chunk in iter(lambda: f.read(8192), b""):
299
+ # Read in 64KB chunks for memory efficiency
300
+ for chunk in iter(lambda: f.read(65536), b""):
217
301
  h.update(chunk)
218
302
  return h.hexdigest()
219
303
 
220
304
 
221
- def upsert_file(db: sqlite3.Connection, path: str, last_modified: float, fhash: str) -> int:
305
+ def upsert_file(
306
+ db: sqlite3.Connection,
307
+ path: str,
308
+ last_modified: float,
309
+ fhash: str,
310
+ auto_commit: bool = True,
311
+ ) -> int:
222
312
  """Insert or update a file record. Returns the file_id."""
223
- cur = db.execute(
313
+ db.execute(
224
314
  """
225
315
  INSERT INTO files (path, last_modified, file_hash)
226
316
  VALUES (?, ?, ?)
@@ -230,13 +320,14 @@ def upsert_file(db: sqlite3.Connection, path: str, last_modified: float, fhash:
230
320
  """,
231
321
  (path, last_modified, fhash),
232
322
  )
233
- db.commit()
323
+ if auto_commit:
324
+ db.commit()
234
325
  # Fetch the id (needed because last_insert_rowid isn't reliable on update)
235
326
  row = db.execute("SELECT id FROM files WHERE path = ?", (path,)).fetchone()
236
327
  return row[0]
237
328
 
238
329
 
239
- def delete_file_data(db: sqlite3.Connection, file_id: int) -> None:
330
+ def delete_file_data(db: sqlite3.Connection, file_id: int, auto_commit: bool = True) -> None:
240
331
  """Remove all symbols, embeddings, and references for a file.
241
332
 
242
333
  This is called before re-indexing to guarantee idempotency.
@@ -251,7 +342,8 @@ def delete_file_data(db: sqlite3.Connection, file_id: int) -> None:
251
342
 
252
343
  db.execute("DELETE FROM symbols WHERE file_id = ?", (file_id,))
253
344
  db.execute("DELETE FROM references_ WHERE file_id = ?", (file_id,))
254
- db.commit()
345
+ if auto_commit:
346
+ db.commit()
255
347
 
256
348
 
257
349
  def upsert_symbol(
@@ -263,6 +355,7 @@ def upsert_symbol(
263
355
  line_end: int,
264
356
  parent_symbol_id: int | None,
265
357
  source_text: str,
358
+ auto_commit: bool = True,
266
359
  ) -> int:
267
360
  """Insert or update a symbol record. Returns the symbol_id."""
268
361
  db.execute(
@@ -277,7 +370,8 @@ def upsert_symbol(
277
370
  """,
278
371
  (name, kind, file_id, line_start, line_end, parent_symbol_id, source_text),
279
372
  )
280
- db.commit()
373
+ if auto_commit:
374
+ db.commit()
281
375
  row = db.execute(
282
376
  "SELECT id FROM symbols WHERE file_id = ? AND name = ? AND kind = ? AND line_start = ?",
283
377
  (file_id, name, kind, line_start),
@@ -286,7 +380,11 @@ def upsert_symbol(
286
380
 
287
381
 
288
382
  def upsert_reference(
289
- db: sqlite3.Connection, symbol_name: str, file_id: int, line_number: int
383
+ db: sqlite3.Connection,
384
+ symbol_name: str,
385
+ file_id: int,
386
+ line_number: int,
387
+ auto_commit: bool = True,
290
388
  ) -> None:
291
389
  """Insert or update a cross-reference record."""
292
390
  db.execute(
@@ -297,10 +395,16 @@ def upsert_reference(
297
395
  """,
298
396
  (symbol_name, file_id, line_number),
299
397
  )
300
- db.commit()
398
+ if auto_commit:
399
+ db.commit()
301
400
 
302
401
 
303
- def upsert_embedding(db: sqlite3.Connection, symbol_id: int, embedding: list[float]) -> None:
402
+ def upsert_embedding(
403
+ db: sqlite3.Connection,
404
+ symbol_id: int,
405
+ embedding: list[float],
406
+ auto_commit: bool = True,
407
+ ) -> None:
304
408
  """Insert or replace a symbol's dense vector embedding."""
305
409
  import struct
306
410
 
@@ -311,7 +415,8 @@ def upsert_embedding(db: sqlite3.Connection, symbol_id: int, embedding: list[flo
311
415
  "INSERT INTO symbol_embeddings (symbol_id, embedding) VALUES (?, ?)",
312
416
  (symbol_id, blob),
313
417
  )
314
- db.commit()
418
+ if auto_commit:
419
+ db.commit()
315
420
 
316
421
 
317
422
  # ---------------------------------------------------------------------------
@@ -320,7 +425,12 @@ def upsert_embedding(db: sqlite3.Connection, symbol_id: int, embedding: list[flo
320
425
 
321
426
 
322
427
  def upsert_doc_file(
323
- db: sqlite3.Connection, path: str, last_modified: float, fhash: str, doc_type: str
428
+ db: sqlite3.Connection,
429
+ path: str,
430
+ last_modified: float,
431
+ fhash: str,
432
+ doc_type: str,
433
+ auto_commit: bool = True,
324
434
  ) -> int:
325
435
  """Insert or update a documentation file record. Returns doc_file_id."""
326
436
  db.execute(
@@ -334,12 +444,13 @@ def upsert_doc_file(
334
444
  """,
335
445
  (path, last_modified, fhash, doc_type),
336
446
  )
337
- db.commit()
447
+ if auto_commit:
448
+ db.commit()
338
449
  row = db.execute("SELECT id FROM doc_files WHERE path = ?", (path,)).fetchone()
339
450
  return row[0]
340
451
 
341
452
 
342
- def delete_doc_file_data(db: sqlite3.Connection, doc_file_id: int) -> None:
453
+ def delete_doc_file_data(db: sqlite3.Connection, doc_file_id: int, auto_commit: bool = True) -> None:
343
454
  """Remove all chunks and embeddings for a documentation file.
344
455
 
345
456
  This is called before re-indexing to guarantee idempotency.
@@ -356,7 +467,8 @@ def delete_doc_file_data(db: sqlite3.Connection, doc_file_id: int) -> None:
356
467
  db.execute(f"DELETE FROM doc_embeddings WHERE chunk_id IN ({placeholders})", chunk_ids)
357
468
 
358
469
  db.execute("DELETE FROM doc_chunks WHERE doc_file_id = ?", (doc_file_id,))
359
- db.commit()
470
+ if auto_commit:
471
+ db.commit()
360
472
 
361
473
 
362
474
  def upsert_doc_chunk(
@@ -367,6 +479,7 @@ def upsert_doc_chunk(
367
479
  content: str,
368
480
  line_start: int,
369
481
  line_end: int,
482
+ auto_commit: bool = True,
370
483
  ) -> int:
371
484
  """Insert or update a documentation chunk. Returns chunk_id."""
372
485
  db.execute(
@@ -382,7 +495,8 @@ def upsert_doc_chunk(
382
495
  """,
383
496
  (doc_file_id, chunk_index, section_title, content, line_start, line_end),
384
497
  )
385
- db.commit()
498
+ if auto_commit:
499
+ db.commit()
386
500
  row = db.execute(
387
501
  "SELECT id FROM doc_chunks WHERE doc_file_id = ? AND chunk_index = ?",
388
502
  (doc_file_id, chunk_index),
@@ -390,7 +504,12 @@ def upsert_doc_chunk(
390
504
  return row[0]
391
505
 
392
506
 
393
- def upsert_doc_embedding(db: sqlite3.Connection, chunk_id: int, embedding: list[float]) -> None:
507
+ def upsert_doc_embedding(
508
+ db: sqlite3.Connection,
509
+ chunk_id: int,
510
+ embedding: list[float],
511
+ auto_commit: bool = True,
512
+ ) -> None:
394
513
  """Insert or replace a documentation chunk's dense vector embedding."""
395
514
  import struct
396
515
 
@@ -400,4 +519,5 @@ def upsert_doc_embedding(db: sqlite3.Connection, chunk_id: int, embedding: list[
400
519
  "INSERT INTO doc_embeddings (chunk_id, embedding) VALUES (?, ?)",
401
520
  (chunk_id, blob),
402
521
  )
403
- db.commit()
522
+ if auto_commit:
523
+ db.commit()
@@ -7,10 +7,8 @@ and indexes them for hybrid retrieval (BM25 + vector search).
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- import hashlib
11
10
  import os
12
11
  import re
13
- from pathlib import Path
14
12
 
15
13
  from markdown_it import MarkdownIt
16
14
 
@@ -239,7 +237,7 @@ def index_doc_file(
239
237
  overlap: int = DEFAULT_OVERLAP,
240
238
  min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE,
241
239
  ) -> dict:
242
- """Index a documentation file.
240
+ """Index a documentation file with batch embeddings and transaction.
243
241
 
244
242
  Args:
245
243
  filepath: Path to the documentation file.
@@ -259,7 +257,7 @@ def index_doc_file(
259
257
  # Check if file has changed
260
258
  stat = os.stat(abs_path)
261
259
  last_modified = stat.st_mtime
262
- fhash = db_mod.file_hash(abs_path)
260
+ fhash = db_mod.file_hash(abs_path) # Now uses xxHash
263
261
 
264
262
  existing = db.execute(
265
263
  "SELECT id, file_hash FROM doc_files WHERE path = ?", (abs_path,)
@@ -285,8 +283,9 @@ def index_doc_file(
285
283
  # Parse and chunk
286
284
  sections = parse_markdown_sections(abs_path)
287
285
 
288
- chunks_indexed = 0
289
- chunk_index = 0
286
+ # === BATCH PROCESSING ===
287
+ chunks_to_store: list[dict] = []
288
+ embed_inputs: list[str] = []
290
289
 
291
290
  for section in sections:
292
291
  content = section["content"]
@@ -300,22 +299,34 @@ def index_doc_file(
300
299
  if len(sub_content) < min_chunk_size:
301
300
  continue
302
301
 
303
- chunk_id = db_mod.upsert_doc_chunk(
304
- db,
305
- doc_file_id,
306
- chunk_index,
307
- section["section_title"],
308
- sub_content,
309
- section["line_start"],
310
- section["line_end"],
311
- )
312
-
313
- # Generate and store embedding
314
- embedding = db_mod.embed_text(f"{section['section_title'] or ''}: {sub_content}")
315
- db_mod.upsert_doc_embedding(db, chunk_id, embedding)
302
+ chunks_to_store.append({
303
+ "section_title": section["section_title"],
304
+ "content": sub_content,
305
+ "line_start": section["line_start"],
306
+ "line_end": section["line_end"],
307
+ })
308
+ embed_input = f"{section['section_title'] or ''}: {sub_content}"
309
+ embed_inputs.append(embed_input)
316
310
 
317
- chunk_index += 1
318
- chunks_indexed += 1
311
+ # Batch embed all chunks
312
+ chunks_indexed = 0
313
+ if embed_inputs:
314
+ embeddings = db_mod.embed_texts_batch(embed_inputs, batch_size=64)
315
+
316
+ with db_mod.transaction(db):
317
+ for i, chunk in enumerate(chunks_to_store):
318
+ chunk_id = db_mod.upsert_doc_chunk(
319
+ db,
320
+ doc_file_id,
321
+ i, # chunk_index
322
+ chunk["section_title"],
323
+ chunk["content"],
324
+ chunk["line_start"],
325
+ chunk["line_end"],
326
+ auto_commit=False,
327
+ )
328
+ db_mod.upsert_doc_embedding(db, chunk_id, embeddings[i], auto_commit=False)
329
+ chunks_indexed += 1
319
330
 
320
331
  return {
321
332
  "file": filepath,
@@ -356,8 +367,7 @@ def index_doc_directory(dirpath: str, db) -> list[dict]:
356
367
  def extract_docstrings_from_code(db) -> list[dict]:
357
368
  """Extract docstrings from already-indexed code symbols.
358
369
 
359
- This function queries the existing symbols table and extracts
360
- docstrings from the source_text field.
370
+ Uses batch embedding generation for better performance.
361
371
 
362
372
  Args:
363
373
  db: Database connection.
@@ -377,6 +387,10 @@ def extract_docstrings_from_code(db) -> list[dict]:
377
387
  """
378
388
  ).fetchall()
379
389
 
390
+ # === BATCH PROCESSING ===
391
+ docstrings_to_store: list[dict] = []
392
+ embed_inputs: list[str] = []
393
+
380
394
  for row in rows:
381
395
  symbol_id, name, kind, file_path, line_start, line_end, source_text = row
382
396
 
@@ -398,50 +412,68 @@ def extract_docstrings_from_code(db) -> list[dict]:
398
412
  if existing:
399
413
  continue
400
414
 
401
- # Create a doc_file entry for the code file if needed
402
- doc_file = db.execute(
403
- "SELECT id FROM doc_files WHERE path = ?", (file_path,)
404
- ).fetchone()
405
-
406
- if not doc_file:
407
- # Get file stats
408
- stat = os.stat(file_path) if os.path.exists(file_path) else None
409
- doc_file_id = db_mod.upsert_doc_file(
410
- db,
411
- file_path,
412
- stat.st_mtime if stat else 0,
413
- db_mod.file_hash(file_path) if stat else "",
414
- "docstring",
415
- )
416
- else:
417
- doc_file_id = doc_file[0]
418
-
419
- # Get next chunk index
420
- max_idx = db.execute(
421
- "SELECT COALESCE(MAX(chunk_index), -1) FROM doc_chunks WHERE doc_file_id = ?",
422
- (doc_file_id,),
423
- ).fetchone()[0]
424
-
425
- chunk_id = db_mod.upsert_doc_chunk(
426
- db,
427
- doc_file_id,
428
- max_idx + 1,
429
- name, # Use symbol name as section title
430
- docstring,
431
- line_start,
432
- line_end,
433
- )
434
-
435
- # Generate and store embedding
436
- embedding = db_mod.embed_text(f"{kind} {name}: {docstring}")
437
- db_mod.upsert_doc_embedding(db, chunk_id, embedding)
438
-
439
- results.append({
440
- "symbol": name,
415
+ docstrings_to_store.append({
416
+ "name": name,
441
417
  "kind": kind,
442
- "file": file_path,
443
- "docstring_length": len(docstring),
418
+ "file_path": file_path,
419
+ "line_start": line_start,
420
+ "line_end": line_end,
421
+ "docstring": docstring,
444
422
  })
423
+ embed_inputs.append(f"{kind} {name}: {docstring}")
424
+
425
+ # Batch embed all docstrings
426
+ if embed_inputs:
427
+ embeddings = db_mod.embed_texts_batch(embed_inputs, batch_size=64)
428
+
429
+ with db_mod.transaction(db):
430
+ for i, doc_info in enumerate(docstrings_to_store):
431
+ file_path = doc_info["file_path"]
432
+
433
+ # Create a doc_file entry for the code file if needed
434
+ doc_file = db.execute(
435
+ "SELECT id FROM doc_files WHERE path = ?", (file_path,)
436
+ ).fetchone()
437
+
438
+ if not doc_file:
439
+ # Get file stats
440
+ stat = os.stat(file_path) if os.path.exists(file_path) else None
441
+ doc_file_id = db_mod.upsert_doc_file(
442
+ db,
443
+ file_path,
444
+ stat.st_mtime if stat else 0,
445
+ db_mod.file_hash(file_path) if stat else "",
446
+ "docstring",
447
+ auto_commit=False,
448
+ )
449
+ else:
450
+ doc_file_id = doc_file[0]
451
+
452
+ # Get next chunk index
453
+ max_idx = db.execute(
454
+ "SELECT COALESCE(MAX(chunk_index), -1) FROM doc_chunks WHERE doc_file_id = ?",
455
+ (doc_file_id,),
456
+ ).fetchone()[0]
457
+
458
+ chunk_id = db_mod.upsert_doc_chunk(
459
+ db,
460
+ doc_file_id,
461
+ max_idx + 1,
462
+ doc_info["name"], # Use symbol name as section title
463
+ doc_info["docstring"],
464
+ doc_info["line_start"],
465
+ doc_info["line_end"],
466
+ auto_commit=False,
467
+ )
468
+
469
+ db_mod.upsert_doc_embedding(db, chunk_id, embeddings[i], auto_commit=False)
470
+
471
+ results.append({
472
+ "symbol": doc_info["name"],
473
+ "kind": doc_info["kind"],
474
+ "file": file_path,
475
+ "docstring_length": len(doc_info["docstring"]),
476
+ })
445
477
 
446
478
  return results
447
479
 
@@ -15,14 +15,13 @@ Design rules
15
15
 
16
16
  from __future__ import annotations
17
17
 
18
- from datetime import datetime, timezone
18
+ from datetime import UTC, datetime
19
19
  from pathlib import Path
20
20
  from typing import Any
21
21
 
22
22
  import git
23
23
  from git.exc import InvalidGitRepositoryError, NoSuchPathError
24
24
 
25
-
26
25
  # ---------------------------------------------------------------------------
27
26
  # Helpers
28
27
  # ---------------------------------------------------------------------------
@@ -34,7 +33,7 @@ def _commit_to_dict(commit: git.Commit, *, include_files_changed_count: bool = F
34
33
  include_files_changed_count: If True, compute the number of files
35
34
  changed (triggers a diff — slow for bulk iteration).
36
35
  """
37
- dt = datetime.fromtimestamp(commit.committed_date, tz=timezone.utc)
36
+ dt = datetime.fromtimestamp(commit.committed_date, tz=UTC)
38
37
  result: dict[str, Any] = {
39
38
  "hash": commit.hexsha[:7],
40
39
  "full_hash": commit.hexsha,
@@ -143,7 +142,7 @@ def get_commit_detail(
143
142
  return {"error": f"Could not resolve commit '{commit_hash}': {exc}"}
144
143
 
145
144
  try:
146
- dt = datetime.fromtimestamp(commit.committed_date, tz=timezone.utc)
145
+ dt = datetime.fromtimestamp(commit.committed_date, tz=UTC)
147
146
 
148
147
  parent_hashes = [p.hexsha[:7] for p in commit.parents]
149
148
 
@@ -271,7 +270,7 @@ def get_blame(
271
270
  "full_hash": commit.hexsha,
272
271
  "author": str(commit.author),
273
272
  "date": datetime.fromtimestamp(
274
- commit.committed_date, tz=timezone.utc
273
+ commit.committed_date, tz=UTC
275
274
  ).isoformat(),
276
275
  "line_content": line_text,
277
276
  "commit_message": commit.message.strip().split("\n")[0],
@@ -10,6 +10,8 @@ from __future__ import annotations
10
10
  import logging
11
11
  import os
12
12
  import sys
13
+ import time
14
+ from contextlib import contextmanager
13
15
  from datetime import datetime
14
16
  from typing import TextIO
15
17
 
@@ -24,6 +26,27 @@ DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
24
26
  _initialized = False
25
27
 
26
28
 
29
+ @contextmanager
30
+ def log_timing(operation_name: str, logger: logging.Logger):
31
+ """Context manager to log operation timing.
32
+
33
+ Args:
34
+ operation_name: Name of the operation being timed.
35
+ logger: Logger instance to use for logging.
36
+
37
+ Example:
38
+ with log_timing("Indexing myfile.py", logger):
39
+ # ... indexing code ...
40
+ """
41
+ start = time.perf_counter()
42
+ logger.debug(f"{operation_name} started")
43
+ try:
44
+ yield
45
+ finally:
46
+ elapsed = time.perf_counter() - start
47
+ logger.info(f"{operation_name} completed in {elapsed:.2f}s")
48
+
49
+
27
50
  def setup_logging(level: str = LOG_LEVEL, stream: TextIO = sys.stderr) -> logging.Logger:
28
51
  """Configure structured logging for code-memory.
29
52
 
@@ -96,7 +119,7 @@ class ToolLogger:
96
119
  self.result_count: int | None = None
97
120
  self.error: str | None = None
98
121
 
99
- def __enter__(self) -> "ToolLogger":
122
+ def __enter__(self) -> ToolLogger:
100
123
  self.start_time = datetime.now()
101
124
  # Sanitize params for logging (don't log sensitive data)
102
125
  safe_params = {k: v for k, v in self.params.items() if v is not None}