claude-code-workflow 6.3.13 → 6.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/.claude/agents/issue-plan-agent.md +57 -103
  2. package/.claude/agents/issue-queue-agent.md +69 -120
  3. package/.claude/commands/issue/new.md +217 -473
  4. package/.claude/commands/issue/plan.md +76 -154
  5. package/.claude/commands/issue/queue.md +208 -259
  6. package/.claude/skills/issue-manage/SKILL.md +63 -22
  7. package/.claude/workflows/cli-templates/schemas/discovery-finding-schema.json +3 -3
  8. package/.claude/workflows/cli-templates/schemas/issues-jsonl-schema.json +3 -3
  9. package/.claude/workflows/cli-templates/schemas/queue-schema.json +0 -5
  10. package/.codex/prompts/issue-plan.md +16 -19
  11. package/.codex/prompts/issue-queue.md +0 -1
  12. package/README.md +1 -0
  13. package/ccw/dist/cli.d.ts.map +1 -1
  14. package/ccw/dist/cli.js +3 -1
  15. package/ccw/dist/cli.js.map +1 -1
  16. package/ccw/dist/commands/cli.d.ts.map +1 -1
  17. package/ccw/dist/commands/cli.js +45 -3
  18. package/ccw/dist/commands/cli.js.map +1 -1
  19. package/ccw/dist/commands/issue.d.ts +3 -1
  20. package/ccw/dist/commands/issue.d.ts.map +1 -1
  21. package/ccw/dist/commands/issue.js +383 -30
  22. package/ccw/dist/commands/issue.js.map +1 -1
  23. package/ccw/dist/core/routes/issue-routes.d.ts.map +1 -1
  24. package/ccw/dist/core/routes/issue-routes.js +77 -16
  25. package/ccw/dist/core/routes/issue-routes.js.map +1 -1
  26. package/ccw/dist/tools/cli-executor.d.ts.map +1 -1
  27. package/ccw/dist/tools/cli-executor.js +117 -4
  28. package/ccw/dist/tools/cli-executor.js.map +1 -1
  29. package/ccw/dist/tools/litellm-executor.d.ts +4 -0
  30. package/ccw/dist/tools/litellm-executor.d.ts.map +1 -1
  31. package/ccw/dist/tools/litellm-executor.js +54 -1
  32. package/ccw/dist/tools/litellm-executor.js.map +1 -1
  33. package/ccw/dist/tools/ui-generate-preview.d.ts +18 -0
  34. package/ccw/dist/tools/ui-generate-preview.d.ts.map +1 -1
  35. package/ccw/dist/tools/ui-generate-preview.js +26 -10
  36. package/ccw/dist/tools/ui-generate-preview.js.map +1 -1
  37. package/ccw/src/cli.ts +3 -1
  38. package/ccw/src/commands/cli.ts +47 -3
  39. package/ccw/src/commands/issue.ts +442 -34
  40. package/ccw/src/core/routes/issue-routes.ts +82 -16
  41. package/ccw/src/tools/cli-executor.ts +125 -4
  42. package/ccw/src/tools/litellm-executor.ts +107 -24
  43. package/ccw/src/tools/ui-generate-preview.js +60 -37
  44. package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
  45. package/codex-lens/src/codexlens/__pycache__/entities.cpython-313.pyc +0 -0
  46. package/codex-lens/src/codexlens/config.py +25 -2
  47. package/codex-lens/src/codexlens/entities.py +5 -1
  48. package/codex-lens/src/codexlens/indexing/__pycache__/symbol_extractor.cpython-313.pyc +0 -0
  49. package/codex-lens/src/codexlens/indexing/symbol_extractor.py +243 -243
  50. package/codex-lens/src/codexlens/parsers/__pycache__/factory.cpython-313.pyc +0 -0
  51. package/codex-lens/src/codexlens/parsers/__pycache__/treesitter_parser.cpython-313.pyc +0 -0
  52. package/codex-lens/src/codexlens/parsers/factory.py +256 -256
  53. package/codex-lens/src/codexlens/parsers/treesitter_parser.py +335 -335
  54. package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-313.pyc +0 -0
  55. package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
  56. package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
  57. package/codex-lens/src/codexlens/search/chain_search.py +30 -1
  58. package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-313.pyc +0 -0
  59. package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-313.pyc +0 -0
  60. package/codex-lens/src/codexlens/semantic/__pycache__/reranker.cpython-313.pyc +0 -0
  61. package/codex-lens/src/codexlens/semantic/__pycache__/vector_store.cpython-313.pyc +0 -0
  62. package/codex-lens/src/codexlens/semantic/embedder.py +6 -9
  63. package/codex-lens/src/codexlens/semantic/vector_store.py +271 -200
  64. package/codex-lens/src/codexlens/storage/__pycache__/dir_index.cpython-313.pyc +0 -0
  65. package/codex-lens/src/codexlens/storage/__pycache__/index_tree.cpython-313.pyc +0 -0
  66. package/codex-lens/src/codexlens/storage/__pycache__/sqlite_store.cpython-313.pyc +0 -0
  67. package/codex-lens/src/codexlens/storage/sqlite_store.py +184 -108
  68. package/package.json +6 -1
  69. package/.claude/commands/issue/manage.md +0 -113
@@ -9,12 +9,13 @@ Optimized for high-performance similarity search using:
9
9
 
10
10
  from __future__ import annotations
11
11
 
12
- import json
13
- import logging
14
- import sqlite3
15
- import threading
16
- from pathlib import Path
17
- from typing import Any, Dict, List, Optional, Tuple
12
+ import json
13
+ import logging
14
+ import sys
15
+ import sqlite3
16
+ import threading
17
+ from pathlib import Path
18
+ from typing import Any, Dict, List, Optional, Tuple
18
19
 
19
20
  from codexlens.entities import SearchResult, SemanticChunk
20
21
  from codexlens.errors import StorageError
@@ -39,6 +40,34 @@ logger = logging.getLogger(__name__)
39
40
  # Epsilon used to guard against floating point precision edge cases (e.g., near-zero norms).
40
41
  EPSILON = 1e-10
41
42
 
43
+ # SQLite INTEGER PRIMARY KEY uses signed 64-bit rowids.
44
+ SQLITE_INTEGER_MAX = (1 << 63) - 1
45
+
46
+
47
+ def _validate_chunk_id_range(start_id: int, count: int) -> None:
48
+ """Validate that a batch insert can safely generate sequential chunk IDs."""
49
+ if count <= 0:
50
+ return
51
+
52
+ last_id = start_id + count - 1
53
+ if last_id > sys.maxsize or last_id > SQLITE_INTEGER_MAX:
54
+ raise ValueError(
55
+ "Chunk ID range overflow: "
56
+ f"start_id={start_id}, count={count} would allocate up to {last_id}, "
57
+ f"exceeding limits (sys.maxsize={sys.maxsize}, sqlite_max={SQLITE_INTEGER_MAX}). "
58
+ "Consider cleaning up the index database or creating a new index database."
59
+ )
60
+
61
+
62
+ def _validate_sql_placeholders(placeholders: str, expected_count: int) -> None:
63
+ """Validate the placeholder string used for a parameterized SQL IN clause."""
64
+ expected = ",".join("?" * expected_count)
65
+ if placeholders != expected:
66
+ raise ValueError(
67
+ "Invalid SQL placeholders for IN clause. "
68
+ f"Expected {expected_count} '?' placeholders."
69
+ )
70
+
42
71
 
43
72
  def _cosine_similarity(a: List[float], b: List[float]) -> float:
44
73
  """Compute cosine similarity between two vectors."""
@@ -443,11 +472,11 @@ class VectorStore:
443
472
  self._invalidate_cache()
444
473
  return ids
445
474
 
446
- def add_chunks_batch(
447
- self,
448
- chunks_with_paths: List[Tuple[SemanticChunk, str]],
449
- update_ann: bool = True,
450
- auto_save_ann: bool = True,
475
+ def add_chunks_batch(
476
+ self,
477
+ chunks_with_paths: List[Tuple[SemanticChunk, str]],
478
+ update_ann: bool = True,
479
+ auto_save_ann: bool = True,
451
480
  ) -> List[int]:
452
481
  """Batch insert chunks from multiple files in a single transaction.
453
482
 
@@ -459,16 +488,18 @@ class VectorStore:
459
488
  auto_save_ann: If True, save ANN index after update (default: True).
460
489
  Set to False for bulk inserts to reduce I/O overhead.
461
490
 
462
- Returns:
463
- List of inserted chunk IDs
464
- """
465
- if not chunks_with_paths:
466
- return []
467
-
468
- # Prepare batch data
469
- batch_data = []
470
- embeddings_list = []
471
- for chunk, file_path in chunks_with_paths:
491
+ Returns:
492
+ List of inserted chunk IDs
493
+ """
494
+ if not chunks_with_paths:
495
+ return []
496
+
497
+ batch_size = len(chunks_with_paths)
498
+
499
+ # Prepare batch data
500
+ batch_data = []
501
+ embeddings_list = []
502
+ for chunk, file_path in chunks_with_paths:
472
503
  if chunk.embedding is None:
473
504
  raise ValueError("All chunks must have embeddings")
474
505
  # Optimize: avoid repeated np.array() if already numpy
@@ -481,49 +512,51 @@ class VectorStore:
481
512
  batch_data.append((file_path, chunk.content, embedding_blob, metadata_json))
482
513
  embeddings_list.append(embedding_arr)
483
514
 
484
- # Batch insert to SQLite in single transaction
485
- with sqlite3.connect(self.db_path) as conn:
486
- # Get starting ID before insert
487
- row = conn.execute("SELECT MAX(id) FROM semantic_chunks").fetchone()
488
- start_id = (row[0] or 0) + 1
489
-
490
- conn.executemany(
491
- """
492
- INSERT INTO semantic_chunks (file_path, content, embedding, metadata)
493
- VALUES (?, ?, ?, ?)
515
+ # Batch insert to SQLite in single transaction
516
+ with sqlite3.connect(self.db_path) as conn:
517
+ # Get starting ID before insert
518
+ row = conn.execute("SELECT MAX(id) FROM semantic_chunks").fetchone()
519
+ start_id = (row[0] or 0) + 1
520
+
521
+ _validate_chunk_id_range(start_id, batch_size)
522
+
523
+ conn.executemany(
524
+ """
525
+ INSERT INTO semantic_chunks (file_path, content, embedding, metadata)
526
+ VALUES (?, ?, ?, ?)
494
527
  """,
495
528
  batch_data
496
- )
497
- conn.commit()
498
- # Calculate inserted IDs based on starting ID
499
- ids = list(range(start_id, start_id + len(chunks_with_paths)))
500
-
501
- # Handle ANN index updates
502
- if embeddings_list and update_ann and self._ensure_ann_index(len(embeddings_list[0])):
503
- # In bulk insert mode, accumulate for later batch update
504
- if self._bulk_insert_mode:
505
- self._bulk_insert_ids.extend(ids)
506
- self._bulk_insert_embeddings.extend(embeddings_list)
507
- else:
508
- # Normal mode: update immediately
509
- with self._ann_write_lock:
510
- try:
511
- embeddings_matrix = np.vstack(embeddings_list)
512
- self._ann_index.add_vectors(ids, embeddings_matrix)
513
- if auto_save_ann:
514
- self._ann_index.save()
515
- except Exception as e:
516
- logger.warning("Failed to add batch to ANN index: %s", e)
529
+ )
530
+ conn.commit()
531
+ # Calculate inserted IDs based on starting ID
532
+ ids = list(range(start_id, start_id + batch_size))
533
+
534
+ # Handle ANN index updates
535
+ if embeddings_list and update_ann and self._ensure_ann_index(len(embeddings_list[0])):
536
+ with self._ann_write_lock:
537
+ # In bulk insert mode, accumulate for later batch update
538
+ if self._bulk_insert_mode:
539
+ self._bulk_insert_ids.extend(ids)
540
+ self._bulk_insert_embeddings.extend(embeddings_list)
541
+ else:
542
+ # Normal mode: update immediately
543
+ try:
544
+ embeddings_matrix = np.vstack(embeddings_list)
545
+ self._ann_index.add_vectors(ids, embeddings_matrix)
546
+ if auto_save_ann:
547
+ self._ann_index.save()
548
+ except Exception as e:
549
+ logger.warning("Failed to add batch to ANN index: %s", e)
517
550
 
518
551
  # Invalidate cache after modification
519
552
  self._invalidate_cache()
520
553
  return ids
521
554
 
522
- def add_chunks_batch_numpy(
523
- self,
524
- chunks_with_paths: List[Tuple[SemanticChunk, str]],
525
- embeddings_matrix: np.ndarray,
526
- update_ann: bool = True,
555
+ def add_chunks_batch_numpy(
556
+ self,
557
+ chunks_with_paths: List[Tuple[SemanticChunk, str]],
558
+ embeddings_matrix: np.ndarray,
559
+ update_ann: bool = True,
527
560
  auto_save_ann: bool = True,
528
561
  ) -> List[int]:
529
562
  """Batch insert chunks with pre-computed numpy embeddings matrix.
@@ -537,16 +570,18 @@ class VectorStore:
537
570
  update_ann: If True, update ANN index with new vectors (default: True)
538
571
  auto_save_ann: If True, save ANN index after update (default: True)
539
572
 
540
- Returns:
541
- List of inserted chunk IDs
542
- """
543
- if not chunks_with_paths:
544
- return []
545
-
546
- if len(chunks_with_paths) != embeddings_matrix.shape[0]:
547
- raise ValueError(
548
- f"Mismatch: {len(chunks_with_paths)} chunks but "
549
- f"{embeddings_matrix.shape[0]} embeddings"
573
+ Returns:
574
+ List of inserted chunk IDs
575
+ """
576
+ if not chunks_with_paths:
577
+ return []
578
+
579
+ batch_size = len(chunks_with_paths)
580
+
581
+ if len(chunks_with_paths) != embeddings_matrix.shape[0]:
582
+ raise ValueError(
583
+ f"Mismatch: {len(chunks_with_paths)} chunks but "
584
+ f"{embeddings_matrix.shape[0]} embeddings"
550
585
  )
551
586
 
552
587
  # Ensure float32 format
@@ -560,45 +595,47 @@ class VectorStore:
560
595
  metadata_json = json.dumps(chunk.metadata) if chunk.metadata else None
561
596
  batch_data.append((file_path, chunk.content, embedding_blob, metadata_json))
562
597
 
563
- # Batch insert to SQLite in single transaction
564
- with sqlite3.connect(self.db_path) as conn:
565
- # Get starting ID before insert
566
- row = conn.execute("SELECT MAX(id) FROM semantic_chunks").fetchone()
567
- start_id = (row[0] or 0) + 1
568
-
569
- conn.executemany(
570
- """
571
- INSERT INTO semantic_chunks (file_path, content, embedding, metadata)
572
- VALUES (?, ?, ?, ?)
598
+ # Batch insert to SQLite in single transaction
599
+ with sqlite3.connect(self.db_path) as conn:
600
+ # Get starting ID before insert
601
+ row = conn.execute("SELECT MAX(id) FROM semantic_chunks").fetchone()
602
+ start_id = (row[0] or 0) + 1
603
+
604
+ _validate_chunk_id_range(start_id, batch_size)
605
+
606
+ conn.executemany(
607
+ """
608
+ INSERT INTO semantic_chunks (file_path, content, embedding, metadata)
609
+ VALUES (?, ?, ?, ?)
573
610
  """,
574
611
  batch_data
575
- )
576
- conn.commit()
577
- # Calculate inserted IDs based on starting ID
578
- ids = list(range(start_id, start_id + len(chunks_with_paths)))
579
-
580
- # Handle ANN index updates
581
- if update_ann and self._ensure_ann_index(embeddings_matrix.shape[1]):
582
- # In bulk insert mode, accumulate for later batch update
583
- if self._bulk_insert_mode:
584
- self._bulk_insert_ids.extend(ids)
585
- # Split matrix into individual arrays for accumulation
586
- self._bulk_insert_embeddings.extend([embeddings_matrix[i] for i in range(len(ids))])
587
- else:
588
- # Normal mode: update immediately
589
- with self._ann_write_lock:
590
- try:
591
- self._ann_index.add_vectors(ids, embeddings_matrix)
592
- if auto_save_ann:
593
- self._ann_index.save()
594
- except Exception as e:
595
- logger.warning("Failed to add batch to ANN index: %s", e)
612
+ )
613
+ conn.commit()
614
+ # Calculate inserted IDs based on starting ID
615
+ ids = list(range(start_id, start_id + batch_size))
616
+
617
+ # Handle ANN index updates
618
+ if update_ann and self._ensure_ann_index(embeddings_matrix.shape[1]):
619
+ with self._ann_write_lock:
620
+ # In bulk insert mode, accumulate for later batch update
621
+ if self._bulk_insert_mode:
622
+ self._bulk_insert_ids.extend(ids)
623
+ # Split matrix into individual arrays for accumulation
624
+ self._bulk_insert_embeddings.extend([embeddings_matrix[i] for i in range(len(ids))])
625
+ else:
626
+ # Normal mode: update immediately
627
+ try:
628
+ self._ann_index.add_vectors(ids, embeddings_matrix)
629
+ if auto_save_ann:
630
+ self._ann_index.save()
631
+ except Exception as e:
632
+ logger.warning("Failed to add batch to ANN index: %s", e)
596
633
 
597
634
  # Invalidate cache after modification
598
635
  self._invalidate_cache()
599
636
  return ids
600
637
 
601
- def begin_bulk_insert(self) -> None:
638
+ def begin_bulk_insert(self) -> None:
602
639
  """Begin bulk insert mode - disable ANN auto-update for better performance.
603
640
 
604
641
  Usage:
@@ -614,42 +651,45 @@ class VectorStore:
614
651
  for batch in batches:
615
652
  store.add_chunks_batch(batch)
616
653
  """
617
- self._bulk_insert_mode = True
618
- self._bulk_insert_ids.clear()
619
- self._bulk_insert_embeddings.clear()
620
- logger.debug("Entered bulk insert mode")
621
-
622
- def end_bulk_insert(self) -> None:
654
+ with self._ann_write_lock:
655
+ self._bulk_insert_mode = True
656
+ self._bulk_insert_ids.clear()
657
+ self._bulk_insert_embeddings.clear()
658
+ logger.debug("Entered bulk insert mode")
659
+
660
+ def end_bulk_insert(self) -> None:
623
661
  """End bulk insert mode and rebuild ANN index from accumulated data.
624
662
 
625
663
  This method should be called after all bulk inserts are complete to
626
664
  update the ANN index in a single batch operation.
627
665
  """
628
- if not self._bulk_insert_mode:
629
- logger.warning("end_bulk_insert called but not in bulk insert mode")
630
- return
631
-
632
- self._bulk_insert_mode = False
633
-
634
- # Update ANN index with all accumulated data
635
- if self._bulk_insert_ids and self._bulk_insert_embeddings:
636
- if self._ensure_ann_index(len(self._bulk_insert_embeddings[0])):
637
- with self._ann_write_lock:
638
- try:
639
- embeddings_matrix = np.vstack(self._bulk_insert_embeddings)
640
- self._ann_index.add_vectors(self._bulk_insert_ids, embeddings_matrix)
641
- self._ann_index.save()
642
- logger.info(
643
- "Bulk insert complete: added %d vectors to ANN index",
644
- len(self._bulk_insert_ids)
645
- )
646
- except Exception as e:
647
- logger.error("Failed to update ANN index after bulk insert: %s", e)
648
-
649
- # Clear accumulated data
650
- self._bulk_insert_ids.clear()
651
- self._bulk_insert_embeddings.clear()
652
- logger.debug("Exited bulk insert mode")
666
+ with self._ann_write_lock:
667
+ if not self._bulk_insert_mode:
668
+ logger.warning("end_bulk_insert called but not in bulk insert mode")
669
+ return
670
+
671
+ self._bulk_insert_mode = False
672
+ bulk_ids = list(self._bulk_insert_ids)
673
+ bulk_embeddings = list(self._bulk_insert_embeddings)
674
+ self._bulk_insert_ids.clear()
675
+ self._bulk_insert_embeddings.clear()
676
+
677
+ # Update ANN index with accumulated data.
678
+ if bulk_ids and bulk_embeddings:
679
+ if self._ensure_ann_index(len(bulk_embeddings[0])):
680
+ with self._ann_write_lock:
681
+ try:
682
+ embeddings_matrix = np.vstack(bulk_embeddings)
683
+ self._ann_index.add_vectors(bulk_ids, embeddings_matrix)
684
+ self._ann_index.save()
685
+ logger.info(
686
+ "Bulk insert complete: added %d vectors to ANN index",
687
+ len(bulk_ids),
688
+ )
689
+ except Exception as e:
690
+ logger.error("Failed to update ANN index after bulk insert: %s", e)
691
+
692
+ logger.debug("Exited bulk insert mode")
653
693
 
654
694
  class BulkInsertContext:
655
695
  """Context manager for bulk insert operations."""
@@ -712,34 +752,39 @@ class VectorStore:
712
752
  self._invalidate_cache()
713
753
  return deleted
714
754
 
715
- def search_similar(
716
- self,
717
- query_embedding: List[float],
718
- top_k: int = 10,
719
- min_score: float = 0.0,
720
- return_full_content: bool = True,
721
- ) -> List[SearchResult]:
722
- """Find chunks most similar to query embedding.
755
+ def search_similar(
756
+ self,
757
+ query_embedding: List[float],
758
+ top_k: int = 10,
759
+ min_score: float = 0.0,
760
+ return_full_content: bool = True,
761
+ ) -> List[SearchResult]:
762
+ """Find chunks most similar to query embedding.
723
763
 
724
764
  Uses HNSW index for O(log N) search when available, falls back to
725
765
  brute-force NumPy search otherwise.
726
766
 
727
- Args:
728
- query_embedding: Query vector.
729
- top_k: Maximum results to return.
730
- min_score: Minimum similarity score (0-1).
731
- return_full_content: If True, return full code block content.
732
-
733
- Returns:
734
- List of SearchResult ordered by similarity (highest first).
735
- """
736
- query_vec = np.array(query_embedding, dtype=np.float32)
767
+ Args:
768
+ query_embedding: Query vector.
769
+ top_k: Maximum results to return.
770
+ min_score: Minimum cosine similarity score in [0.0, 1.0].
771
+ return_full_content: If True, return full code block content.
737
772
 
738
- # Try HNSW search first (O(log N))
739
- if (
740
- HNSWLIB_AVAILABLE
741
- and self._ann_index is not None
742
- and self._ann_index.is_loaded
773
+ Returns:
774
+ List of SearchResult ordered by similarity (highest first).
775
+ """
776
+ query_vec = np.array(query_embedding, dtype=np.float32)
777
+
778
+ if not 0.0 <= min_score <= 1.0:
779
+ raise ValueError(
780
+ f"Invalid min_score: {min_score}. Must be within [0.0, 1.0] for cosine similarity."
781
+ )
782
+
783
+ # Try HNSW search first (O(log N))
784
+ if (
785
+ HNSWLIB_AVAILABLE
786
+ and self._ann_index is not None
787
+ and self._ann_index.is_loaded
743
788
  and self._ann_index.count() > 0
744
789
  ):
745
790
  try:
@@ -754,20 +799,20 @@ class VectorStore:
754
799
  query_vec, top_k, min_score, return_full_content
755
800
  )
756
801
 
757
- def _search_with_ann(
758
- self,
759
- query_vec: np.ndarray,
760
- top_k: int,
761
- min_score: float,
762
- return_full_content: bool,
763
- ) -> List[SearchResult]:
764
- """Search using HNSW index (O(log N)).
765
-
766
- Args:
767
- query_vec: Query vector as numpy array
768
- top_k: Maximum results to return
769
- min_score: Minimum similarity score (0-1)
770
- return_full_content: If True, return full code block content
802
+ def _search_with_ann(
803
+ self,
804
+ query_vec: np.ndarray,
805
+ top_k: int,
806
+ min_score: float,
807
+ return_full_content: bool,
808
+ ) -> List[SearchResult]:
809
+ """Search using HNSW index (O(log N)).
810
+
811
+ Args:
812
+ query_vec: Query vector as numpy array
813
+ top_k: Maximum results to return
814
+ min_score: Minimum cosine similarity score in [0.0, 1.0]
815
+ return_full_content: If True, return full code block content
771
816
 
772
817
  Returns:
773
818
  List of SearchResult ordered by similarity (highest first)
@@ -779,15 +824,36 @@ class VectorStore:
779
824
  if effective_top_k == 0:
780
825
  return []
781
826
 
782
- # HNSW search returns (ids, distances)
783
- # For cosine space: distance = 1 - similarity
784
- ids, distances = self._ann_index.search(query_vec, effective_top_k)
785
-
786
- if not ids:
787
- return []
788
-
789
- # Convert distances to similarity scores
790
- scores = [1.0 - d for d in distances]
827
+ # HNSW search returns (ids, distances)
828
+ # For cosine space: distance = 1 - similarity
829
+ ids, distances = self._ann_index.search(query_vec, effective_top_k)
830
+
831
+ if ids is None or distances is None:
832
+ logger.debug(
833
+ "ANN search returned null results (ids=%s, distances=%s)",
834
+ ids,
835
+ distances,
836
+ )
837
+ return []
838
+
839
+ if len(ids) == 0 or len(distances) == 0:
840
+ logger.debug(
841
+ "ANN search returned empty results (ids=%s, distances=%s)",
842
+ ids,
843
+ distances,
844
+ )
845
+ return []
846
+
847
+ if len(ids) != len(distances):
848
+ logger.warning(
849
+ "ANN search returned mismatched result lengths (%d ids, %d distances)",
850
+ len(ids),
851
+ len(distances),
852
+ )
853
+ return []
854
+
855
+ # Convert distances to similarity scores
856
+ scores = [1.0 - d for d in distances]
791
857
 
792
858
  # Filter by min_score
793
859
  filtered = [
@@ -805,20 +871,20 @@ class VectorStore:
805
871
  # Fetch content from SQLite
806
872
  return self._fetch_results_by_ids(top_ids, top_scores, return_full_content)
807
873
 
808
- def _search_brute_force(
809
- self,
810
- query_vec: np.ndarray,
811
- top_k: int,
812
- min_score: float,
813
- return_full_content: bool,
814
- ) -> List[SearchResult]:
815
- """Brute-force search using NumPy (O(N) fallback).
816
-
817
- Args:
818
- query_vec: Query vector as numpy array
819
- top_k: Maximum results to return
820
- min_score: Minimum similarity score (0-1)
821
- return_full_content: If True, return full code block content
874
+ def _search_brute_force(
875
+ self,
876
+ query_vec: np.ndarray,
877
+ top_k: int,
878
+ min_score: float,
879
+ return_full_content: bool,
880
+ ) -> List[SearchResult]:
881
+ """Brute-force search using NumPy (O(N) fallback).
882
+
883
+ Args:
884
+ query_vec: Query vector as numpy array
885
+ top_k: Maximum results to return
886
+ min_score: Minimum cosine similarity score in [0.0, 1.0]
887
+ return_full_content: If True, return full code block content
822
888
 
823
889
  Returns:
824
890
  List of SearchResult ordered by similarity (highest first)
@@ -885,16 +951,21 @@ class VectorStore:
885
951
  Returns:
886
952
  List of SearchResult objects.
887
953
  """
888
- if not chunk_ids:
889
- return []
890
-
891
- # Build parameterized query for IN clause
892
- placeholders = ",".join("?" * len(chunk_ids))
893
- query = f"""
894
- SELECT id, file_path, content, metadata
895
- FROM semantic_chunks
896
- WHERE id IN ({placeholders})
897
- """
954
+ if not chunk_ids:
955
+ return []
956
+
957
+ # Build parameterized query for IN clause
958
+ placeholders = ",".join("?" * len(chunk_ids))
959
+ _validate_sql_placeholders(placeholders, len(chunk_ids))
960
+
961
+ # SQL injection prevention:
962
+ # - Only a validated placeholders string (commas + '?') is interpolated into the query.
963
+ # - User-provided values are passed separately via sqlite3 parameters.
964
+ query = """
965
+ SELECT id, file_path, content, metadata
966
+ FROM semantic_chunks
967
+ WHERE id IN ({placeholders})
968
+ """.format(placeholders=placeholders)
898
969
 
899
970
  with sqlite3.connect(self.db_path) as conn:
900
971
  conn.execute("PRAGMA mmap_size = 30000000000")