cosma-backend 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. cosma_backend/__init__.py +14 -0
  2. cosma_backend/__main__.py +4 -0
  3. cosma_backend/api/__init__.py +29 -0
  4. cosma_backend/api/files.py +154 -0
  5. cosma_backend/api/index.py +114 -0
  6. cosma_backend/api/models.py +28 -0
  7. cosma_backend/api/search.py +166 -0
  8. cosma_backend/api/status.py +28 -0
  9. cosma_backend/api/updates.py +67 -0
  10. cosma_backend/api/watch.py +156 -0
  11. cosma_backend/app.py +192 -0
  12. cosma_backend/db/__init__.py +2 -0
  13. cosma_backend/db/database.py +638 -0
  14. cosma_backend/discoverer/__init__.py +1 -0
  15. cosma_backend/discoverer/discoverer.py +34 -0
  16. cosma_backend/embedder/__init__.py +1 -0
  17. cosma_backend/embedder/embedder.py +637 -0
  18. cosma_backend/logging.py +73 -0
  19. cosma_backend/models/__init__.py +3 -0
  20. cosma_backend/models/file.py +169 -0
  21. cosma_backend/models/status.py +10 -0
  22. cosma_backend/models/update.py +202 -0
  23. cosma_backend/models/watch.py +132 -0
  24. cosma_backend/pipeline/__init__.py +2 -0
  25. cosma_backend/pipeline/pipeline.py +222 -0
  26. cosma_backend/schema.sql +319 -0
  27. cosma_backend/searcher/__init__.py +1 -0
  28. cosma_backend/searcher/searcher.py +397 -0
  29. cosma_backend/summarizer/__init__.py +44 -0
  30. cosma_backend/summarizer/summarizer.py +1075 -0
  31. cosma_backend/utils/bundled.py +24 -0
  32. cosma_backend/utils/pubsub.py +31 -0
  33. cosma_backend/utils/sse.py +92 -0
  34. cosma_backend/watcher/__init__.py +1 -0
  35. cosma_backend/watcher/awatchdog.py +80 -0
  36. cosma_backend/watcher/watcher.py +257 -0
  37. cosma_backend-0.1.0.dist-info/METADATA +23 -0
  38. cosma_backend-0.1.0.dist-info/RECORD +39 -0
  39. cosma_backend-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,637 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ @File : embedder.py
4
+ @Time : 2025/07/14
5
+ @Author :
6
+ @Version : 1.0
7
+ @Contact :
8
+ @License :
9
+ @Desc : Embedding generation for semantic search
10
+ """
11
+
12
+ from datetime import datetime, timezone
13
+ import os
14
+ import asyncio
15
+ from abc import ABC, abstractmethod
16
+ from typing import Any
17
+
18
+ # Import AI libraries
19
+ import litellm
20
+ import numpy as np
21
+ from backend.models import File
22
+ import logging
23
+ from backend.logging import sm
24
+ from backend.models.status import ProcessingStatus
25
+
26
+ # Configure structured logger
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class EmbedderError(Exception):
31
+ """Base exception for embedder errors."""
32
+
33
+
34
+ class EmbeddingProviderError(EmbedderError):
35
+ """Exception for embedding provider-specific errors."""
36
+
37
+
38
+ class BaseEmbedder(ABC):
39
+ """Abstract base class for text embedders."""
40
+
41
+ def __init__(self, model_name: str, dimensions: int) -> None:
42
+ """
43
+ Initialize embedder with model specifications.
44
+
45
+ Args:
46
+ model_name: Name of the embedding model
47
+ dimensions: Dimension of the output embeddings
48
+ """
49
+ self.model_name = model_name
50
+ self.dimensions = dimensions
51
+ logger.info(sm("Initializing embedder", model=model_name, dimensions=dimensions))
52
+
53
+ @abstractmethod
54
+ def embed_text(self, text: str | list[str]) -> np.ndarray:
55
+ """
56
+ Generate embeddings for text input.
57
+
58
+ Args:
59
+ text: Single text or list of texts to embed
60
+
61
+ Returns:
62
+ Numpy array of embeddings (single vector or matrix)
63
+ """
64
+
65
+ async def embed_text_async(self, text: str | list[str]) -> np.ndarray:
66
+ """
67
+ Async version of embed_text that runs in a thread pool.
68
+
69
+ Args:
70
+ text: Single text or list of texts to embed
71
+
72
+ Returns:
73
+ Numpy array of embeddings (single vector or matrix)
74
+ """
75
+ # Check if this is an OnlineEmbedder with direct async support
76
+ if hasattr(self, '_embed_text_async'):
77
+ return await self._embed_text_async(text)
78
+
79
+ # Fallback to thread pool for legacy implementations
80
+ loop = asyncio.get_event_loop()
81
+ return await loop.run_in_executor(None, self.embed_text, text)
82
+
83
+ @abstractmethod
84
+ def is_available(self) -> bool:
85
+ """Check if this embedder is available for use."""
86
+
87
+ def _validate_text(self, text: str | list[str]) -> list[str]:
88
+ """Validate and normalize text input."""
89
+ texts = [text] if isinstance(text, str) else text
90
+
91
+ # Filter out empty texts
92
+ valid_texts = [t for t in texts if t and t.strip()]
93
+
94
+ if not valid_texts:
95
+ msg = "No valid text provided for embedding"
96
+ raise ValueError(msg)
97
+
98
+ return valid_texts
99
+
100
+
101
+ class OnlineEmbedder(BaseEmbedder):
102
+ """Embedder using online models via LiteLLM (OpenAI API)."""
103
+
104
+ def __init__(self, model: str | None = None, api_key: str | None = None, dimensions: int | None = None) -> None:
105
+ """
106
+ Initialize online embedder.
107
+
108
+ Args:
109
+ model: Model name (default: text-embedding-3-small)
110
+ api_key: API key (default from env)
111
+ dimensions: Embedding dimensions (default: 512 for efficiency)
112
+ """
113
+ # Default to text-embedding-3-small with 512 dimensions for efficiency
114
+ self.model = model or os.getenv("EMBEDDING_MODEL", "text-embedding-3-small")
115
+ self.configured_dimensions = dimensions or int(os.getenv("EMBEDDING_DIMENSIONS", "512"))
116
+
117
+ # Initialize base class
118
+ super().__init__(model_name=self.model, dimensions=self.configured_dimensions)
119
+
120
+ # Set API key if provided
121
+ if api_key:
122
+ os.environ["OPENAI_API_KEY"] = api_key
123
+
124
+ # Validate model compatibility
125
+ if self.model == "text-embedding-3-small":
126
+ if not (512 <= self.configured_dimensions <= 1536):
127
+ msg = f"Dimensions must be between 512 and 1536 for {self.model}"
128
+ raise ValueError(msg)
129
+
130
+ logger.info(sm("Online embedder initialized",
131
+ model=self.model,
132
+ dimensions=self.configured_dimensions))
133
+
134
+ def is_available(self) -> bool:
135
+ """Check if online embedder is available."""
136
+ return bool(os.getenv("OPENAI_API_KEY"))
137
+
138
+ def embed_text(self, text: str | list[str]) -> np.ndarray:
139
+ """Generate embeddings using online model."""
140
+ texts = self._validate_text(text)
141
+
142
+ logger.debug(sm("Generating embeddings",
143
+ model=self.model,
144
+ num_texts=len(texts),
145
+ dimensions=self.configured_dimensions))
146
+
147
+ try:
148
+ # Call litellm embedding endpoint
149
+ response = litellm.embedding(
150
+ model=self.model,
151
+ input=texts,
152
+ dimensions=self.configured_dimensions, # Only for models that support it
153
+ timeout=60,
154
+ max_retries=2
155
+ )
156
+
157
+ # Extract embeddings from response
158
+ embeddings = []
159
+ for item in response.data:
160
+ embeddings.append(item["embedding"])
161
+
162
+ # Convert to numpy array
163
+ embeddings_array = np.array(embeddings, dtype=np.float32)
164
+
165
+ # Return single vector if input was single text
166
+ if isinstance(text, str):
167
+ return embeddings_array[0]
168
+
169
+ return embeddings_array
170
+
171
+ except Exception as e:
172
+ error_msg = f"Online embedding generation failed: {e!s}"
173
+ logger.exception(sm(error_msg, model=self.model))
174
+ raise EmbeddingProviderError(error_msg)
175
+
176
+ async def _embed_text_async(self, text: str | list[str]) -> np.ndarray:
177
+ """Truly async embedding generation using litellm async API."""
178
+ texts = self._validate_text(text)
179
+
180
+ logger.debug(sm("Generating embeddings async",
181
+ model=self.model,
182
+ num_texts=len(texts),
183
+ dimensions=self.configured_dimensions))
184
+
185
+ try:
186
+ # Call async litellm embedding endpoint
187
+ response = await litellm.aembedding(
188
+ model=self.model,
189
+ input=texts,
190
+ dimensions=self.configured_dimensions,
191
+ timeout=60,
192
+ max_retries=2
193
+ )
194
+
195
+ # Extract embeddings from response
196
+ embeddings = []
197
+ for item in response.data:
198
+ embeddings.append(item["embedding"])
199
+
200
+ # Convert to numpy array
201
+ embeddings_array = np.array(embeddings, dtype=np.float32)
202
+
203
+ # Return single vector if input was single text
204
+ if isinstance(text, str):
205
+ return embeddings_array[0]
206
+
207
+ return embeddings_array
208
+
209
+ except Exception as e:
210
+ error_msg = f"Async online embedding generation failed: {e!s}"
211
+ logger.exception(sm(error_msg, model=self.model))
212
+ raise EmbeddingProviderError(error_msg)
213
+
214
+
215
+ class LocalEmbedder(BaseEmbedder):
216
+ """Embedder using local models via sentence-transformers."""
217
+
218
+ def __init__(self, model_name: str | None = None, dimensions: int | None = None) -> None:
219
+ """
220
+ Initialize local embedder.
221
+
222
+ Args:
223
+ model_name: Model name (default: Qwen/Qwen3-Embedding-0.6B)
224
+ dimensions: Embedding dimensions (default: 768 for efficiency)
225
+ """
226
+ # Import sentence-transformers lazily
227
+ try:
228
+ from sentence_transformers import SentenceTransformer
229
+ self.sentence_transformers_available = True
230
+ except ImportError:
231
+ self.sentence_transformers_available = False
232
+ logger.warning(sm("sentence-transformers not installed, local embeddings unavailable"))
233
+
234
+ # Default to Qwen model with 768 dimensions for efficiency
235
+ self.model_name = model_name or os.getenv("LOCAL_EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-0.6B")
236
+ self.configured_dimensions = dimensions or int(os.getenv("LOCAL_EMBEDDING_DIMENSIONS", "768"))
237
+
238
+ # Initialize base class
239
+ super().__init__(model_name=self.model_name, dimensions=self.configured_dimensions)
240
+
241
+ # Validate model compatibility
242
+ if "Qwen3-Embedding" in self.model_name:
243
+ if not (32 <= self.configured_dimensions <= 1024):
244
+ msg = f"Dimensions must be between 32 and 1024 for {self.model_name}"
245
+ raise ValueError(msg)
246
+
247
+ # Initialize model if available
248
+ self.model = None
249
+ if self.sentence_transformers_available:
250
+ try:
251
+ logger.info(sm("Loading local embedding model",
252
+ model=self.model_name,
253
+ dimensions=self.configured_dimensions))
254
+ # This will automatically download from HuggingFace if model doesn't exist locally
255
+ self.model = SentenceTransformer(self.model_name)
256
+ logger.info(sm("Local embedder initialized",
257
+ model=self.model_name,
258
+ dimensions=self.configured_dimensions))
259
+ except Exception as e:
260
+ logger.exception(sm("Failed to load local model",
261
+ model=self.model_name,
262
+ error=str(e)))
263
+ logger.info(sm("Model will be downloaded from HuggingFace on first use"))
264
+
265
+ def is_available(self) -> bool:
266
+ """Check if local embedder is available."""
267
+ return self.sentence_transformers_available and self.model is not None
268
+
269
+ def embed_text(self, text: str | list[str]) -> np.ndarray:
270
+ """Generate embeddings using local model."""
271
+ if not self.is_available():
272
+ msg = "Local embedder not available"
273
+ raise EmbeddingProviderError(msg)
274
+
275
+ texts = self._validate_text(text)
276
+
277
+ logger.debug(sm("Generating local embeddings",
278
+ model=self.model_name,
279
+ num_texts=len(texts),
280
+ dimensions=self.configured_dimensions))
281
+
282
+ try:
283
+ # Generate embeddings
284
+ embeddings = self.model.encode(
285
+ texts,
286
+ normalize_embeddings=True,
287
+ show_progress_bar=False
288
+ )
289
+
290
+ # Truncate to configured dimensions if needed
291
+ if embeddings.shape[1] > self.configured_dimensions:
292
+ embeddings = embeddings[:, :self.configured_dimensions]
293
+
294
+ # Convert to float32
295
+ embeddings = embeddings.astype(np.float32)
296
+
297
+ # Return single vector if input was single text
298
+ if isinstance(text, str):
299
+ return embeddings[0]
300
+
301
+ return embeddings
302
+
303
+ except Exception as e:
304
+ error_msg = f"Local embedding generation failed: {e!s}"
305
+ logger.exception(sm(error_msg, model=self.model_name))
306
+ raise EmbeddingProviderError(error_msg)
307
+
308
+
309
+ class AutoEmbedder:
310
+ """
311
+ Automatic embedder that selects the best available provider.
312
+
313
+ Tries providers in order of preference:
314
+ 1. User-specified provider
315
+ 2. Online models (OpenAI)
316
+ 3. Local models (fallback)
317
+ """
318
+
319
+ def __init__(self, preferred_provider: str | None = None) -> None:
320
+ """
321
+ Initialize auto embedder.
322
+
323
+ Args:
324
+ preferred_provider: Preferred provider ('local', 'online')
325
+ """
326
+ self.preferred_provider = preferred_provider or os.getenv("EMBEDDING_PROVIDER", "local")
327
+ logger.debug(sm("Preferred provider", og=preferred_provider, provider=self.preferred_provider))
328
+ self.embedders = {}
329
+
330
+ logger.info(sm("AutoEmbedder initializing",
331
+ preferred_provider=self.preferred_provider))
332
+
333
+ # Eagerly initialize models based on preferred provider
334
+ self._eagerly_initialize_models()
335
+
336
+ def _eagerly_initialize_models(self) -> None:
337
+ """Initialize embedding models based on provider preference - eager for local, lazy for online."""
338
+ logger.info(sm("Initializing embedding models"))
339
+
340
+ if self.preferred_provider == "local":
341
+ # Eagerly initialize local embedder for local preference
342
+ logger.info(sm("Eagerly loading local embedding models"))
343
+ local_embedder = self._get_local_embedder()
344
+ if local_embedder:
345
+ logger.info(sm("Local embedder ready",
346
+ model=local_embedder.model_name,
347
+ dimensions=local_embedder.dimensions))
348
+ else:
349
+ logger.warning(sm("Local embedder failed to initialize"))
350
+
351
+ # Check online availability but don't initialize (lazy loading)
352
+ logger.info(sm("Checking online embedding provider availability (lazy loading)"))
353
+ online_available = self._check_online_availability()
354
+ if online_available:
355
+ logger.info(sm("Online embedder available as fallback (will load on first use)"))
356
+ else:
357
+ logger.warning(sm("Online embedder not available - check API keys"))
358
+ else:
359
+ # For online preference, check availability but don't initialize (lazy loading)
360
+ logger.info(sm("Checking online embedding provider availability (lazy loading)"))
361
+ online_available = self._check_online_availability()
362
+ if online_available:
363
+ logger.info(sm("Online embedder ready (will load on first use)",
364
+ provider="online"))
365
+ else:
366
+ logger.warning(sm("Online embedder not available - check API keys"))
367
+
368
+ # Skip local model initialization when user explicitly chose online
369
+ logger.info(sm("Skipping local embedding model initialization (online provider preferred)"))
370
+ logger.info(sm("To use local models as fallback, set EMBEDDING_PROVIDER=local"))
371
+
372
+ # Summary of initialization strategy
373
+ if self.preferred_provider == "local":
374
+ logger.info(sm("AutoEmbedder configured: LOCAL models preloaded, ONLINE models lazy-loaded"))
375
+ else:
376
+ logger.info(sm("AutoEmbedder configured: ONLINE models only (LOCAL models skipped)"))
377
+
378
+ logger.info(sm("AutoEmbedder initialization complete",
379
+ preferred_provider=self.preferred_provider,
380
+ strategy="eager_local_lazy_online" if self.preferred_provider == "local" else "online_only"))
381
+
382
+ def _check_online_availability(self) -> bool:
383
+ """Check if online embedder is available without initializing it."""
384
+ try:
385
+ return bool(os.getenv("OPENAI_API_KEY"))
386
+ except Exception:
387
+ return False
388
+
389
+ def _check_local_availability(self) -> bool:
390
+ """Check if local embedder is available without initializing it."""
391
+ try:
392
+ import importlib.util
393
+ return importlib.util.find_spec("sentence_transformers") is not None
394
+ except Exception:
395
+ return False
396
+
397
+ def _get_online_embedder(self) -> OnlineEmbedder | None:
398
+ """Get or create online embedder if available."""
399
+ if "online" not in self.embedders:
400
+ try:
401
+ embedder = OnlineEmbedder()
402
+ if embedder.is_available():
403
+ self.embedders["online"] = embedder
404
+ logger.info(sm("Online embedder available"))
405
+ else:
406
+ logger.debug(sm("Online embedder not available"))
407
+ return None
408
+ except Exception as e:
409
+ logger.debug(sm("Failed to create online embedder", error=str(e)))
410
+ return None
411
+
412
+ return self.embedders.get("online")
413
+
414
+ def _get_local_embedder(self) -> LocalEmbedder | None:
415
+ """Get or create local embedder if available."""
416
+ if "local" not in self.embedders:
417
+ try:
418
+ embedder = LocalEmbedder()
419
+ if embedder.is_available():
420
+ self.embedders["local"] = embedder
421
+ logger.info(sm("Local embedder available"))
422
+ else:
423
+ logger.debug(sm("Local embedder not available"))
424
+ return None
425
+ except Exception as e:
426
+ logger.warning(sm("Failed to create local embedder", error=str(e)))
427
+ return None
428
+
429
+ return self.embedders.get("local")
430
+
431
+ def embed_text(self, text: str | list[str]) -> np.ndarray:
432
+ """
433
+ Generate embeddings using the best available provider.
434
+
435
+ Args:
436
+ text: Text or list of texts to embed
437
+
438
+ Returns:
439
+ Numpy array of embeddings
440
+
441
+ Raises:
442
+ EmbedderError: If no embedders are available or all fail
443
+ """
444
+ providers = []
445
+
446
+ # Build provider list based on preference
447
+ if self.preferred_provider == "online":
448
+ # When online is explicitly preferred, don't initialize local models as fallback
449
+ providers = [self._get_online_embedder()]
450
+ elif self.preferred_provider == "local":
451
+ providers = [self._get_local_embedder(), self._get_online_embedder()]
452
+ else: # default: local first
453
+ providers = [self._get_local_embedder(), self._get_online_embedder()]
454
+
455
+ logger.debug(sm("All available providers", providers=providers))
456
+
457
+ # Try each provider
458
+ for embedder in providers:
459
+ if embedder:
460
+ try:
461
+ logger.info(sm("Attempting embedding generation",
462
+ provider=type(embedder).__name__))
463
+ return embedder.embed_text(text)
464
+ except Exception as e:
465
+ logger.warning(sm("Embedder failed, trying next provider",
466
+ provider=type(embedder).__name__,
467
+ error=str(e)))
468
+ continue
469
+
470
+ error_msg = "All embedding providers failed or are unavailable"
471
+ logger.error(sm(error_msg, preferred_provider=self.preferred_provider))
472
+ raise EmbedderError(error_msg)
473
+
474
+ async def embed_text_async(self, text: str | list[str]) -> np.ndarray:
475
+ """
476
+ Async version of embed_text with fallback providers.
477
+
478
+ Args:
479
+ text: Text or list of texts to embed
480
+
481
+ Returns:
482
+ Numpy array of embeddings
483
+
484
+ Raises:
485
+ EmbedderError: If no embedders are available or all fail
486
+ """
487
+ providers = []
488
+
489
+ # Build provider list based on preference
490
+ if self.preferred_provider == "online":
491
+ providers = [self._get_online_embedder()]
492
+ elif self.preferred_provider == "local":
493
+ providers = [self._get_local_embedder(), self._get_online_embedder()]
494
+ else: # default: local first
495
+ providers = [self._get_local_embedder(), self._get_online_embedder()]
496
+
497
+ # Try each provider
498
+ for embedder in providers:
499
+ if embedder:
500
+ try:
501
+ logger.info(sm("Attempting async embedding generation",
502
+ provider=type(embedder).__name__))
503
+ return await embedder.embed_text_async(text)
504
+ except Exception as e:
505
+ logger.warning(sm("Async embedder failed, trying next provider",
506
+ provider=type(embedder).__name__,
507
+ error=str(e)))
508
+ continue
509
+
510
+ error_msg = "All embedding providers failed or are unavailable"
511
+ logger.error(sm(error_msg, preferred_provider=self.preferred_provider))
512
+ raise EmbedderError(error_msg)
513
+
514
+ def get_model_info(self) -> dict[str, Any]:
515
+ """Get information about the currently available model."""
516
+ if self.preferred_provider == "online" and self._get_online_embedder():
517
+ embedder = self._get_online_embedder()
518
+ return {
519
+ "provider": "online",
520
+ "model": embedder.model_name,
521
+ "dimensions": embedder.dimensions
522
+ }
523
+ if self.preferred_provider == "local" and self._get_local_embedder():
524
+ embedder = self._get_local_embedder()
525
+ return {
526
+ "provider": "local",
527
+ "model": embedder.model_name,
528
+ "dimensions": embedder.dimensions
529
+ }
530
+ # Auto mode - return first available (respecting online-only preference)
531
+ if self._get_online_embedder():
532
+ embedder = self._get_online_embedder()
533
+ return {
534
+ "provider": "online",
535
+ "model": embedder.model_name,
536
+ "dimensions": embedder.dimensions
537
+ }
538
+ # Only try local if not explicitly using online-only
539
+ if self.preferred_provider != "online" and self._get_local_embedder():
540
+ embedder = self._get_local_embedder()
541
+ return {
542
+ "provider": "local",
543
+ "model": embedder.model_name,
544
+ "dimensions": embedder.dimensions
545
+ }
546
+
547
+ return {
548
+ "provider": None,
549
+ "model": None,
550
+ "dimensions": None
551
+ }
552
+
553
+ def get_available_providers(self) -> list[str]:
554
+ """Get list of available providers (respects online-only preference)."""
555
+ providers = []
556
+
557
+ if self._get_online_embedder():
558
+ providers.append("online")
559
+
560
+ # Only check local if not explicitly using online-only
561
+ if self.preferred_provider != "online" and self._get_local_embedder():
562
+ providers.append("local")
563
+
564
+ return providers
565
+
566
+ def _prepare_embedding_text(self, file: File) -> str:
567
+ """
568
+ Prepare text for embedding generation.
569
+
570
+ Args:
571
+ file: File metadata to prepare text from
572
+
573
+ Returns:
574
+ Text prepared for embedding
575
+ """
576
+ parts = []
577
+
578
+ # Add title
579
+ if file.title:
580
+ parts.append(f"Title: {file.title}")
581
+
582
+ # Add summary
583
+ if file.summary:
584
+ parts.append(f"Summary: {file.summary}")
585
+
586
+ # Add keywords
587
+ if file.keywords:
588
+ parts.append(f"Keywords: {', '.join(file.keywords)}")
589
+
590
+ # Add content (truncated)
591
+ # if file.content:
592
+ # content = file.content[:1000] # Limit content length
593
+ # parts.append(f"Content: {content}")
594
+
595
+ return " ".join(parts)
596
+
597
+ async def embed(self, file: File):
598
+ embedding_text = self._prepare_embedding_text(file)
599
+ embedding = await self.embed_text_async(embedding_text)
600
+
601
+ model_info = self.get_model_info()
602
+
603
+ file.embedding = embedding
604
+ file.embedding_model = model_info["model"]
605
+ file.embedding_dimensions = model_info["dimensions"]
606
+ file.embedded_at = datetime.now(timezone.utc)
607
+
608
+ file.status = ProcessingStatus.COMPLETE
609
+
610
+
611
+ # Convenience functions for easier usage
612
+ def generate_embedding(text: str | list[str],
613
+ provider: str | None = None) -> np.ndarray:
614
+ """
615
+ Convenience function to generate embeddings.
616
+
617
+ Args:
618
+ text: Text or list of texts to embed
619
+ provider: Preferred embedding provider
620
+
621
+ Returns:
622
+ Numpy array of embeddings
623
+ """
624
+ embedder = AutoEmbedder(preferred_provider=provider)
625
+ return embedder.embed_text(text)
626
+
627
+
628
+ def get_available_embedders() -> list[str]:
629
+ """Get list of available embedding providers."""
630
+ embedder = AutoEmbedder()
631
+ return embedder.get_available_providers()
632
+
633
+
634
+ def get_embedder_info() -> dict[str, Any]:
635
+ """Get information about the current embedder configuration."""
636
+ embedder = AutoEmbedder()
637
+ return embedder.get_model_info()