claude-code-workflow 6.2.4 → 6.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/ccw/dist/core/lite-scanner-complete.d.ts.map +1 -1
  2. package/ccw/dist/core/lite-scanner-complete.js +4 -1
  3. package/ccw/dist/core/lite-scanner-complete.js.map +1 -1
  4. package/ccw/dist/core/lite-scanner.d.ts.map +1 -1
  5. package/ccw/dist/core/lite-scanner.js +4 -1
  6. package/ccw/dist/core/lite-scanner.js.map +1 -1
  7. package/ccw/dist/core/routes/claude-routes.d.ts.map +1 -1
  8. package/ccw/dist/core/routes/claude-routes.js +3 -5
  9. package/ccw/dist/core/routes/claude-routes.js.map +1 -1
  10. package/ccw/dist/core/routes/cli-routes.d.ts.map +1 -1
  11. package/ccw/dist/core/routes/cli-routes.js +2 -1
  12. package/ccw/dist/core/routes/cli-routes.js.map +1 -1
  13. package/ccw/dist/core/routes/codexlens-routes.d.ts.map +1 -1
  14. package/ccw/dist/core/routes/codexlens-routes.js +31 -6
  15. package/ccw/dist/core/routes/codexlens-routes.js.map +1 -1
  16. package/ccw/dist/core/routes/rules-routes.d.ts.map +1 -1
  17. package/ccw/dist/core/routes/rules-routes.js +4 -3
  18. package/ccw/dist/core/routes/rules-routes.js.map +1 -1
  19. package/ccw/dist/core/routes/skills-routes.d.ts.map +1 -1
  20. package/ccw/dist/core/routes/skills-routes.js +124 -6
  21. package/ccw/dist/core/routes/skills-routes.js.map +1 -1
  22. package/ccw/dist/tools/cli-executor.d.ts +4 -1
  23. package/ccw/dist/tools/cli-executor.d.ts.map +1 -1
  24. package/ccw/dist/tools/cli-executor.js +54 -2
  25. package/ccw/dist/tools/cli-executor.js.map +1 -1
  26. package/ccw/dist/tools/codex-lens.d.ts +20 -3
  27. package/ccw/dist/tools/codex-lens.d.ts.map +1 -1
  28. package/ccw/dist/tools/codex-lens.js +166 -37
  29. package/ccw/dist/tools/codex-lens.js.map +1 -1
  30. package/ccw/package.json +1 -1
  31. package/ccw/src/core/lite-scanner-complete.ts +5 -1
  32. package/ccw/src/core/lite-scanner.ts +5 -1
  33. package/ccw/src/core/routes/claude-routes.ts +3 -5
  34. package/ccw/src/core/routes/cli-routes.ts +2 -1
  35. package/ccw/src/core/routes/codexlens-routes.ts +34 -6
  36. package/ccw/src/core/routes/rules-routes.ts +4 -3
  37. package/ccw/src/core/routes/skills-routes.ts +144 -6
  38. package/ccw/src/templates/dashboard-js/components/mcp-manager.js +7 -12
  39. package/ccw/src/templates/dashboard-js/i18n.js +167 -5
  40. package/ccw/src/templates/dashboard-js/views/claude-manager.js +18 -4
  41. package/ccw/src/templates/dashboard-js/views/cli-manager.js +5 -3
  42. package/ccw/src/templates/dashboard-js/views/codexlens-manager.js +790 -25
  43. package/ccw/src/templates/dashboard-js/views/rules-manager.js +35 -6
  44. package/ccw/src/templates/dashboard-js/views/skills-manager.js +385 -21
  45. package/ccw/src/tools/cli-executor.ts +70 -2
  46. package/ccw/src/tools/codex-lens.ts +183 -35
  47. package/codex-lens/pyproject.toml +66 -48
  48. package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
  49. package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-313.pyc +0 -0
  50. package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-313.pyc +0 -0
  51. package/codex-lens/src/codexlens/cli/embedding_manager.py +3 -3
  52. package/codex-lens/src/codexlens/cli/model_manager.py +24 -2
  53. package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
  54. package/codex-lens/src/codexlens/search/hybrid_search.py +313 -313
  55. package/codex-lens/src/codexlens/semantic/__init__.py +76 -39
  56. package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-313.pyc +0 -0
  57. package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-313.pyc +0 -0
  58. package/codex-lens/src/codexlens/semantic/__pycache__/gpu_support.cpython-313.pyc +0 -0
  59. package/codex-lens/src/codexlens/semantic/__pycache__/ollama_backend.cpython-313.pyc +0 -0
  60. package/codex-lens/src/codexlens/semantic/embedder.py +244 -185
  61. package/codex-lens/src/codexlens/semantic/gpu_support.py +192 -0
  62. package/package.json +1 -1
@@ -1,39 +1,76 @@
1
- """Optional semantic search module for CodexLens.
2
-
3
- Install with: pip install codexlens[semantic]
4
- Uses fastembed (ONNX-based, lightweight ~200MB)
5
- """
6
-
7
- from __future__ import annotations
8
-
9
- SEMANTIC_AVAILABLE = False
10
- SEMANTIC_BACKEND: str | None = None
11
- _import_error: str | None = None
12
-
13
- def _detect_backend() -> tuple[bool, str | None, str | None]:
14
- """Detect if fastembed is available."""
15
- try:
16
- import numpy as np
17
- except ImportError as e:
18
- return False, None, f"numpy not available: {e}"
19
-
20
- try:
21
- from fastembed import TextEmbedding
22
- return True, "fastembed", None
23
- except ImportError:
24
- pass
25
-
26
- return False, None, "fastembed not available. Install with: pip install codexlens[semantic]"
27
-
28
- # Initialize on module load
29
- SEMANTIC_AVAILABLE, SEMANTIC_BACKEND, _import_error = _detect_backend()
30
-
31
- def check_semantic_available() -> tuple[bool, str | None]:
32
- """Check if semantic search dependencies are available."""
33
- return SEMANTIC_AVAILABLE, _import_error
34
-
35
- __all__ = [
36
- "SEMANTIC_AVAILABLE",
37
- "SEMANTIC_BACKEND",
38
- "check_semantic_available",
39
- ]
1
+ """Optional semantic search module for CodexLens.
2
+
3
+ Install with: pip install codexlens[semantic]
4
+ Uses fastembed (ONNX-based, lightweight ~200MB)
5
+
6
+ GPU Acceleration:
7
+ - Automatic GPU detection and usage when available
8
+ - Supports CUDA (NVIDIA), TensorRT, DirectML (Windows), ROCm (AMD), CoreML (Apple)
9
+ - Install GPU support: pip install onnxruntime-gpu (NVIDIA) or onnxruntime-directml (Windows)
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ SEMANTIC_AVAILABLE = False
15
+ SEMANTIC_BACKEND: str | None = None
16
+ GPU_AVAILABLE = False
17
+ _import_error: str | None = None
18
+
19
+
20
+ def _detect_backend() -> tuple[bool, str | None, bool, str | None]:
21
+ """Detect if fastembed and GPU are available."""
22
+ try:
23
+ import numpy as np
24
+ except ImportError as e:
25
+ return False, None, False, f"numpy not available: {e}"
26
+
27
+ try:
28
+ from fastembed import TextEmbedding
29
+ except ImportError:
30
+ return False, None, False, "fastembed not available. Install with: pip install codexlens[semantic]"
31
+
32
+ # Check GPU availability
33
+ gpu_available = False
34
+ try:
35
+ from .gpu_support import is_gpu_available
36
+ gpu_available = is_gpu_available()
37
+ except ImportError:
38
+ pass
39
+
40
+ return True, "fastembed", gpu_available, None
41
+
42
+
43
+ # Initialize on module load
44
+ SEMANTIC_AVAILABLE, SEMANTIC_BACKEND, GPU_AVAILABLE, _import_error = _detect_backend()
45
+
46
+
47
+ def check_semantic_available() -> tuple[bool, str | None]:
48
+ """Check if semantic search dependencies are available."""
49
+ return SEMANTIC_AVAILABLE, _import_error
50
+
51
+
52
+ def check_gpu_available() -> tuple[bool, str]:
53
+ """Check if GPU acceleration is available.
54
+
55
+ Returns:
56
+ Tuple of (is_available, status_message)
57
+ """
58
+ if not SEMANTIC_AVAILABLE:
59
+ return False, "Semantic search not available"
60
+
61
+ try:
62
+ from .gpu_support import is_gpu_available, get_gpu_summary
63
+ if is_gpu_available():
64
+ return True, get_gpu_summary()
65
+ return False, "No GPU detected (using CPU)"
66
+ except ImportError:
67
+ return False, "GPU support module not available"
68
+
69
+
70
+ __all__ = [
71
+ "SEMANTIC_AVAILABLE",
72
+ "SEMANTIC_BACKEND",
73
+ "GPU_AVAILABLE",
74
+ "check_semantic_available",
75
+ "check_gpu_available",
76
+ ]
@@ -1,185 +1,244 @@
1
- """Embedder for semantic code search using fastembed."""
2
-
3
- from __future__ import annotations
4
-
5
- import gc
6
- import threading
7
- from typing import Dict, Iterable, List, Optional
8
-
9
- import numpy as np
10
-
11
- from . import SEMANTIC_AVAILABLE
12
-
13
-
14
- # Global embedder cache for singleton pattern
15
- _embedder_cache: Dict[str, "Embedder"] = {}
16
- _cache_lock = threading.Lock()
17
-
18
-
19
- def get_embedder(profile: str = "code") -> "Embedder":
20
- """Get or create a cached Embedder instance (thread-safe singleton).
21
-
22
- This function provides significant performance improvement by reusing
23
- Embedder instances across multiple searches, avoiding repeated model
24
- loading overhead (~0.8s per load).
25
-
26
- Args:
27
- profile: Model profile ("fast", "code", "multilingual", "balanced")
28
-
29
- Returns:
30
- Cached Embedder instance for the given profile
31
- """
32
- global _embedder_cache
33
-
34
- # Fast path: check cache without lock
35
- if profile in _embedder_cache:
36
- return _embedder_cache[profile]
37
-
38
- # Slow path: acquire lock for initialization
39
- with _cache_lock:
40
- # Double-check after acquiring lock
41
- if profile in _embedder_cache:
42
- return _embedder_cache[profile]
43
-
44
- # Create new embedder and cache it
45
- embedder = Embedder(profile=profile)
46
- # Pre-load model to ensure it's ready
47
- embedder._load_model()
48
- _embedder_cache[profile] = embedder
49
- return embedder
50
-
51
-
52
- def clear_embedder_cache() -> None:
53
- """Clear the embedder cache and release ONNX resources.
54
-
55
- This method ensures proper cleanup of ONNX model resources to prevent
56
- memory leaks when embedders are no longer needed.
57
- """
58
- global _embedder_cache
59
- with _cache_lock:
60
- # Release ONNX resources before clearing cache
61
- for embedder in _embedder_cache.values():
62
- if embedder._model is not None:
63
- del embedder._model
64
- embedder._model = None
65
- _embedder_cache.clear()
66
- gc.collect()
67
-
68
-
69
- class Embedder:
70
- """Generate embeddings for code chunks using fastembed (ONNX-based).
71
-
72
- Supported Model Profiles:
73
- - fast: BAAI/bge-small-en-v1.5 (384 dim) - Fast, lightweight, English-optimized
74
- - code: jinaai/jina-embeddings-v2-base-code (768 dim) - Code-optimized, best for programming languages
75
- - multilingual: intfloat/multilingual-e5-large (1024 dim) - Multilingual + code support
76
- - balanced: mixedbread-ai/mxbai-embed-large-v1 (1024 dim) - High accuracy, general purpose
77
- """
78
-
79
- # Model profiles for different use cases
80
- MODELS = {
81
- "fast": "BAAI/bge-small-en-v1.5", # 384 dim - Fast, lightweight
82
- "code": "jinaai/jina-embeddings-v2-base-code", # 768 dim - Code-optimized
83
- "multilingual": "intfloat/multilingual-e5-large", # 1024 dim - Multilingual
84
- "balanced": "mixedbread-ai/mxbai-embed-large-v1", # 1024 dim - High accuracy
85
- }
86
-
87
- # Dimension mapping for each model
88
- MODEL_DIMS = {
89
- "BAAI/bge-small-en-v1.5": 384,
90
- "jinaai/jina-embeddings-v2-base-code": 768,
91
- "intfloat/multilingual-e5-large": 1024,
92
- "mixedbread-ai/mxbai-embed-large-v1": 1024,
93
- }
94
-
95
- # Default model (fast profile)
96
- DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
97
- DEFAULT_PROFILE = "fast"
98
-
99
- def __init__(self, model_name: str | None = None, profile: str | None = None) -> None:
100
- """Initialize embedder with model or profile.
101
-
102
- Args:
103
- model_name: Explicit model name (e.g., "jinaai/jina-embeddings-v2-base-code")
104
- profile: Model profile shortcut ("fast", "code", "multilingual", "balanced")
105
- If both provided, model_name takes precedence.
106
- """
107
- if not SEMANTIC_AVAILABLE:
108
- raise ImportError(
109
- "Semantic search dependencies not available. "
110
- "Install with: pip install codexlens[semantic]"
111
- )
112
-
113
- # Resolve model name from profile or use explicit name
114
- if model_name:
115
- self.model_name = model_name
116
- elif profile and profile in self.MODELS:
117
- self.model_name = self.MODELS[profile]
118
- else:
119
- self.model_name = self.DEFAULT_MODEL
120
-
121
- self._model = None
122
-
123
- @property
124
- def embedding_dim(self) -> int:
125
- """Get embedding dimension for current model."""
126
- return self.MODEL_DIMS.get(self.model_name, 768) # Default to 768 if unknown
127
-
128
- def _load_model(self) -> None:
129
- """Lazy load the embedding model."""
130
- if self._model is not None:
131
- return
132
-
133
- from fastembed import TextEmbedding
134
- self._model = TextEmbedding(model_name=self.model_name)
135
-
136
- def embed(self, texts: str | Iterable[str]) -> List[List[float]]:
137
- """Generate embeddings for one or more texts.
138
-
139
- Args:
140
- texts: Single text or iterable of texts to embed.
141
-
142
- Returns:
143
- List of embedding vectors (each is a list of floats).
144
-
145
- Note:
146
- This method converts numpy arrays to Python lists for backward compatibility.
147
- For memory-efficient processing, use embed_to_numpy() instead.
148
- """
149
- self._load_model()
150
-
151
- if isinstance(texts, str):
152
- texts = [texts]
153
- else:
154
- texts = list(texts)
155
-
156
- embeddings = list(self._model.embed(texts))
157
- return [emb.tolist() for emb in embeddings]
158
-
159
- def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray:
160
- """Generate embeddings for one or more texts (returns numpy arrays).
161
-
162
- This method is more memory-efficient than embed() as it avoids converting
163
- numpy arrays to Python lists, which can significantly reduce memory usage
164
- during batch processing.
165
-
166
- Args:
167
- texts: Single text or iterable of texts to embed.
168
-
169
- Returns:
170
- numpy.ndarray of shape (n_texts, embedding_dim) containing embeddings.
171
- """
172
- self._load_model()
173
-
174
- if isinstance(texts, str):
175
- texts = [texts]
176
- else:
177
- texts = list(texts)
178
-
179
- # Return embeddings as numpy array directly (no .tolist() conversion)
180
- embeddings = list(self._model.embed(texts))
181
- return np.array(embeddings)
182
-
183
- def embed_single(self, text: str) -> List[float]:
184
- """Generate embedding for a single text."""
185
- return self.embed(text)[0]
1
+ """Embedder for semantic code search using fastembed.
2
+
3
+ Supports GPU acceleration via ONNX execution providers (CUDA, TensorRT, DirectML, ROCm, CoreML).
4
+ GPU acceleration is automatic when available, with transparent CPU fallback.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import gc
10
+ import logging
11
+ import threading
12
+ from typing import Dict, Iterable, List, Optional
13
+
14
+ import numpy as np
15
+
16
+ from . import SEMANTIC_AVAILABLE
17
+ from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Global embedder cache for singleton pattern
22
+ _embedder_cache: Dict[str, "Embedder"] = {}
23
+ _cache_lock = threading.Lock()
24
+
25
+
26
+ def get_embedder(profile: str = "code", use_gpu: bool = True) -> "Embedder":
27
+ """Get or create a cached Embedder instance (thread-safe singleton).
28
+
29
+ This function provides significant performance improvement by reusing
30
+ Embedder instances across multiple searches, avoiding repeated model
31
+ loading overhead (~0.8s per load).
32
+
33
+ Args:
34
+ profile: Model profile ("fast", "code", "multilingual", "balanced")
35
+ use_gpu: If True, use GPU acceleration when available (default: True)
36
+
37
+ Returns:
38
+ Cached Embedder instance for the given profile
39
+ """
40
+ global _embedder_cache
41
+
42
+ # Cache key includes GPU preference to support mixed configurations
43
+ cache_key = f"{profile}:{'gpu' if use_gpu else 'cpu'}"
44
+
45
+ # Fast path: check cache without lock
46
+ if cache_key in _embedder_cache:
47
+ return _embedder_cache[cache_key]
48
+
49
+ # Slow path: acquire lock for initialization
50
+ with _cache_lock:
51
+ # Double-check after acquiring lock
52
+ if cache_key in _embedder_cache:
53
+ return _embedder_cache[cache_key]
54
+
55
+ # Create new embedder and cache it
56
+ embedder = Embedder(profile=profile, use_gpu=use_gpu)
57
+ # Pre-load model to ensure it's ready
58
+ embedder._load_model()
59
+ _embedder_cache[cache_key] = embedder
60
+
61
+ # Log GPU status on first embedder creation
62
+ if use_gpu and is_gpu_available():
63
+ logger.info(f"Embedder initialized with GPU: {get_gpu_summary()}")
64
+ elif use_gpu:
65
+ logger.debug("GPU not available, using CPU for embeddings")
66
+
67
+ return embedder
68
+
69
+
70
+ def clear_embedder_cache() -> None:
71
+ """Clear the embedder cache and release ONNX resources.
72
+
73
+ This method ensures proper cleanup of ONNX model resources to prevent
74
+ memory leaks when embedders are no longer needed.
75
+ """
76
+ global _embedder_cache
77
+ with _cache_lock:
78
+ # Release ONNX resources before clearing cache
79
+ for embedder in _embedder_cache.values():
80
+ if embedder._model is not None:
81
+ del embedder._model
82
+ embedder._model = None
83
+ _embedder_cache.clear()
84
+ gc.collect()
85
+
86
+
87
+ class Embedder:
88
+ """Generate embeddings for code chunks using fastembed (ONNX-based).
89
+
90
+ Supported Model Profiles:
91
+ - fast: BAAI/bge-small-en-v1.5 (384 dim) - Fast, lightweight, English-optimized
92
+ - code: jinaai/jina-embeddings-v2-base-code (768 dim) - Code-optimized, best for programming languages
93
+ - multilingual: intfloat/multilingual-e5-large (1024 dim) - Multilingual + code support
94
+ - balanced: mixedbread-ai/mxbai-embed-large-v1 (1024 dim) - High accuracy, general purpose
95
+ """
96
+
97
+ # Model profiles for different use cases
98
+ MODELS = {
99
+ "fast": "BAAI/bge-small-en-v1.5", # 384 dim - Fast, lightweight
100
+ "code": "jinaai/jina-embeddings-v2-base-code", # 768 dim - Code-optimized
101
+ "multilingual": "intfloat/multilingual-e5-large", # 1024 dim - Multilingual
102
+ "balanced": "mixedbread-ai/mxbai-embed-large-v1", # 1024 dim - High accuracy
103
+ }
104
+
105
+ # Dimension mapping for each model
106
+ MODEL_DIMS = {
107
+ "BAAI/bge-small-en-v1.5": 384,
108
+ "jinaai/jina-embeddings-v2-base-code": 768,
109
+ "intfloat/multilingual-e5-large": 1024,
110
+ "mixedbread-ai/mxbai-embed-large-v1": 1024,
111
+ }
112
+
113
+ # Default model (fast profile)
114
+ DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
115
+ DEFAULT_PROFILE = "fast"
116
+
117
+ def __init__(
118
+ self,
119
+ model_name: str | None = None,
120
+ profile: str | None = None,
121
+ use_gpu: bool = True,
122
+ providers: List[str] | None = None,
123
+ ) -> None:
124
+ """Initialize embedder with model or profile.
125
+
126
+ Args:
127
+ model_name: Explicit model name (e.g., "jinaai/jina-embeddings-v2-base-code")
128
+ profile: Model profile shortcut ("fast", "code", "multilingual", "balanced")
129
+ If both provided, model_name takes precedence.
130
+ use_gpu: If True, use GPU acceleration when available (default: True)
131
+ providers: Explicit ONNX providers list (overrides use_gpu if provided)
132
+ """
133
+ if not SEMANTIC_AVAILABLE:
134
+ raise ImportError(
135
+ "Semantic search dependencies not available. "
136
+ "Install with: pip install codexlens[semantic]"
137
+ )
138
+
139
+ # Resolve model name from profile or use explicit name
140
+ if model_name:
141
+ self.model_name = model_name
142
+ elif profile and profile in self.MODELS:
143
+ self.model_name = self.MODELS[profile]
144
+ else:
145
+ self.model_name = self.DEFAULT_MODEL
146
+
147
+ # Configure ONNX execution providers
148
+ if providers is not None:
149
+ self._providers = providers
150
+ else:
151
+ self._providers = get_optimal_providers(use_gpu=use_gpu)
152
+
153
+ self._use_gpu = use_gpu
154
+ self._model = None
155
+
156
+ @property
157
+ def embedding_dim(self) -> int:
158
+ """Get embedding dimension for current model."""
159
+ return self.MODEL_DIMS.get(self.model_name, 768) # Default to 768 if unknown
160
+
161
+ @property
162
+ def providers(self) -> List[str]:
163
+ """Get configured ONNX execution providers."""
164
+ return self._providers
165
+
166
+ @property
167
+ def is_gpu_enabled(self) -> bool:
168
+ """Check if GPU acceleration is enabled for this embedder."""
169
+ gpu_providers = {"CUDAExecutionProvider", "TensorrtExecutionProvider",
170
+ "DmlExecutionProvider", "ROCMExecutionProvider", "CoreMLExecutionProvider"}
171
+ return any(p in gpu_providers for p in self._providers)
172
+
173
+ def _load_model(self) -> None:
174
+ """Lazy load the embedding model with configured providers."""
175
+ if self._model is not None:
176
+ return
177
+
178
+ from fastembed import TextEmbedding
179
+
180
+ # fastembed supports 'providers' parameter for ONNX execution providers
181
+ try:
182
+ self._model = TextEmbedding(
183
+ model_name=self.model_name,
184
+ providers=self._providers,
185
+ )
186
+ logger.debug(f"Model loaded with providers: {self._providers}")
187
+ except TypeError:
188
+ # Fallback for older fastembed versions without providers parameter
189
+ logger.warning(
190
+ "fastembed version doesn't support 'providers' parameter. "
191
+ "Upgrade fastembed for GPU acceleration: pip install --upgrade fastembed"
192
+ )
193
+ self._model = TextEmbedding(model_name=self.model_name)
194
+
195
+ def embed(self, texts: str | Iterable[str]) -> List[List[float]]:
196
+ """Generate embeddings for one or more texts.
197
+
198
+ Args:
199
+ texts: Single text or iterable of texts to embed.
200
+
201
+ Returns:
202
+ List of embedding vectors (each is a list of floats).
203
+
204
+ Note:
205
+ This method converts numpy arrays to Python lists for backward compatibility.
206
+ For memory-efficient processing, use embed_to_numpy() instead.
207
+ """
208
+ self._load_model()
209
+
210
+ if isinstance(texts, str):
211
+ texts = [texts]
212
+ else:
213
+ texts = list(texts)
214
+
215
+ embeddings = list(self._model.embed(texts))
216
+ return [emb.tolist() for emb in embeddings]
217
+
218
+ def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray:
219
+ """Generate embeddings for one or more texts (returns numpy arrays).
220
+
221
+ This method is more memory-efficient than embed() as it avoids converting
222
+ numpy arrays to Python lists, which can significantly reduce memory usage
223
+ during batch processing.
224
+
225
+ Args:
226
+ texts: Single text or iterable of texts to embed.
227
+
228
+ Returns:
229
+ numpy.ndarray of shape (n_texts, embedding_dim) containing embeddings.
230
+ """
231
+ self._load_model()
232
+
233
+ if isinstance(texts, str):
234
+ texts = [texts]
235
+ else:
236
+ texts = list(texts)
237
+
238
+ # Return embeddings as numpy array directly (no .tolist() conversion)
239
+ embeddings = list(self._model.embed(texts))
240
+ return np.array(embeddings)
241
+
242
+ def embed_single(self, text: str) -> List[float]:
243
+ """Generate embedding for a single text."""
244
+ return self.embed(text)[0]