mdb-engine 0.1.6__py3-none-any.whl → 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. mdb_engine/__init__.py +116 -11
  2. mdb_engine/auth/ARCHITECTURE.md +112 -0
  3. mdb_engine/auth/README.md +654 -11
  4. mdb_engine/auth/__init__.py +136 -29
  5. mdb_engine/auth/audit.py +592 -0
  6. mdb_engine/auth/base.py +252 -0
  7. mdb_engine/auth/casbin_factory.py +265 -70
  8. mdb_engine/auth/config_defaults.py +5 -5
  9. mdb_engine/auth/config_helpers.py +19 -18
  10. mdb_engine/auth/cookie_utils.py +12 -16
  11. mdb_engine/auth/csrf.py +483 -0
  12. mdb_engine/auth/decorators.py +10 -16
  13. mdb_engine/auth/dependencies.py +69 -71
  14. mdb_engine/auth/helpers.py +3 -3
  15. mdb_engine/auth/integration.py +61 -88
  16. mdb_engine/auth/jwt.py +11 -15
  17. mdb_engine/auth/middleware.py +79 -35
  18. mdb_engine/auth/oso_factory.py +21 -41
  19. mdb_engine/auth/provider.py +270 -171
  20. mdb_engine/auth/rate_limiter.py +505 -0
  21. mdb_engine/auth/restrictions.py +21 -36
  22. mdb_engine/auth/session_manager.py +24 -41
  23. mdb_engine/auth/shared_middleware.py +977 -0
  24. mdb_engine/auth/shared_users.py +775 -0
  25. mdb_engine/auth/token_lifecycle.py +10 -12
  26. mdb_engine/auth/token_store.py +17 -32
  27. mdb_engine/auth/users.py +99 -159
  28. mdb_engine/auth/utils.py +236 -42
  29. mdb_engine/cli/commands/generate.py +546 -10
  30. mdb_engine/cli/commands/validate.py +3 -7
  31. mdb_engine/cli/utils.py +7 -7
  32. mdb_engine/config.py +13 -28
  33. mdb_engine/constants.py +65 -0
  34. mdb_engine/core/README.md +117 -6
  35. mdb_engine/core/__init__.py +39 -7
  36. mdb_engine/core/app_registration.py +31 -50
  37. mdb_engine/core/app_secrets.py +289 -0
  38. mdb_engine/core/connection.py +20 -12
  39. mdb_engine/core/encryption.py +222 -0
  40. mdb_engine/core/engine.py +2862 -115
  41. mdb_engine/core/index_management.py +12 -16
  42. mdb_engine/core/manifest.py +628 -204
  43. mdb_engine/core/ray_integration.py +436 -0
  44. mdb_engine/core/seeding.py +13 -21
  45. mdb_engine/core/service_initialization.py +20 -30
  46. mdb_engine/core/types.py +40 -43
  47. mdb_engine/database/README.md +140 -17
  48. mdb_engine/database/__init__.py +17 -6
  49. mdb_engine/database/abstraction.py +37 -50
  50. mdb_engine/database/connection.py +51 -30
  51. mdb_engine/database/query_validator.py +367 -0
  52. mdb_engine/database/resource_limiter.py +204 -0
  53. mdb_engine/database/scoped_wrapper.py +747 -237
  54. mdb_engine/dependencies.py +427 -0
  55. mdb_engine/di/__init__.py +34 -0
  56. mdb_engine/di/container.py +247 -0
  57. mdb_engine/di/providers.py +206 -0
  58. mdb_engine/di/scopes.py +139 -0
  59. mdb_engine/embeddings/README.md +54 -24
  60. mdb_engine/embeddings/__init__.py +31 -24
  61. mdb_engine/embeddings/dependencies.py +38 -155
  62. mdb_engine/embeddings/service.py +78 -75
  63. mdb_engine/exceptions.py +104 -12
  64. mdb_engine/indexes/README.md +30 -13
  65. mdb_engine/indexes/__init__.py +1 -0
  66. mdb_engine/indexes/helpers.py +11 -11
  67. mdb_engine/indexes/manager.py +59 -123
  68. mdb_engine/memory/README.md +95 -4
  69. mdb_engine/memory/__init__.py +1 -2
  70. mdb_engine/memory/service.py +363 -1168
  71. mdb_engine/observability/README.md +4 -2
  72. mdb_engine/observability/__init__.py +26 -9
  73. mdb_engine/observability/health.py +17 -17
  74. mdb_engine/observability/logging.py +10 -10
  75. mdb_engine/observability/metrics.py +40 -19
  76. mdb_engine/repositories/__init__.py +34 -0
  77. mdb_engine/repositories/base.py +325 -0
  78. mdb_engine/repositories/mongo.py +233 -0
  79. mdb_engine/repositories/unit_of_work.py +166 -0
  80. mdb_engine/routing/README.md +1 -1
  81. mdb_engine/routing/__init__.py +1 -3
  82. mdb_engine/routing/websockets.py +41 -75
  83. mdb_engine/utils/__init__.py +3 -1
  84. mdb_engine/utils/mongo.py +117 -0
  85. mdb_engine-0.4.12.dist-info/METADATA +492 -0
  86. mdb_engine-0.4.12.dist-info/RECORD +97 -0
  87. {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/WHEEL +1 -1
  88. mdb_engine-0.1.6.dist-info/METADATA +0 -213
  89. mdb_engine-0.1.6.dist-info/RECORD +0 -75
  90. {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/entry_points.txt +0 -0
  91. {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/licenses/LICENSE +0 -0
  92. {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/top_level.txt +0 -0
@@ -1,97 +1,60 @@
1
1
  """
2
- Embedding Service Dependency Injection for FastAPI
2
+ Embedding Service Utilities
3
3
 
4
- This module provides FastAPI dependency functions to inject embedding services
5
- into route handlers. The embedding service is automatically initialized from
6
- the app's manifest.json configuration.
7
- """
4
+ This module provides utility functions for creating embedding services.
5
+ For FastAPI dependency injection, use the request-scoped dependencies
6
+ from `mdb_engine.dependencies` instead.
8
7
 
9
- from typing import Any, Optional
8
+ Usage:
9
+ # For FastAPI routes (RECOMMENDED):
10
+ from mdb_engine.dependencies import get_embedding_service
10
11
 
11
- # Optional FastAPI import (only needed if FastAPI is available)
12
- try:
13
- from fastapi import Depends, HTTPException
12
+ @app.post("/embed")
13
+ async def embed(embedding_service=Depends(get_embedding_service)):
14
+ ...
14
15
 
15
- FASTAPI_AVAILABLE = True
16
- except ImportError:
17
- FASTAPI_AVAILABLE = False
16
+ # For standalone/utility usage:
17
+ from mdb_engine.embeddings.dependencies import get_embedding_service_for_app
18
18
 
19
- # Stub for when FastAPI is not available
20
- def Depends(*args, **kwargs):
21
- return None
19
+ service = get_embedding_service_for_app("my_app", engine)
20
+ """
22
21
 
23
- class HTTPException(Exception):
24
- pass
22
+ from typing import TYPE_CHECKING
25
23
 
24
+ if TYPE_CHECKING:
25
+ from ..core.engine import MongoDBEngine
26
26
 
27
27
  from .service import EmbeddingService, get_embedding_service
28
28
 
29
- # Global engine registry (for apps that don't pass engine explicitly)
30
- _global_engine: Optional[Any] = None
31
- _global_app_slug: Optional[str] = None
32
-
33
-
34
- def set_global_engine(engine: Any, app_slug: Optional[str] = None) -> None:
35
- """
36
- Set global MongoDBEngine instance for embedding dependency injection.
37
-
38
- This is useful when you have a single engine instance that you want
39
- to use across all apps. Call this during application startup.
40
-
41
- Args:
42
- engine: MongoDBEngine instance
43
- app_slug: Optional app slug
44
- """
45
- global _global_engine, _global_app_slug
46
- _global_engine = engine
47
- _global_app_slug = app_slug
48
-
49
-
50
- def get_global_engine() -> Optional[Any]:
51
- """
52
- Get global MongoDBEngine instance.
53
-
54
- Returns:
55
- MongoDBEngine instance if set, None otherwise
56
- """
57
- return _global_engine
58
-
59
29
 
60
30
  def get_embedding_service_for_app(
61
- app_slug: str, engine: Optional[Any] = None
62
- ) -> Optional[EmbeddingService]:
31
+ app_slug: str, engine: "MongoDBEngine"
32
+ ) -> EmbeddingService | None:
63
33
  """
64
- Get embedding service for a specific app.
34
+ Get embedding service for a specific app using the engine instance.
65
35
 
66
- This is a helper function that can be used with FastAPI's Depends()
67
- to inject the embedding service into route handlers.
36
+ This is a utility function for cases where you need to create an
37
+ embedding service outside of a FastAPI request context (e.g., in
38
+ background tasks, CLI tools, or tests).
39
+
40
+ For FastAPI routes, use `mdb_engine.dependencies.get_embedding_service` instead.
68
41
 
69
42
  Args:
70
- app_slug: App slug (typically extracted from route context)
71
- engine: MongoDBEngine instance (optional, will try to get from context)
43
+ app_slug: App slug to get embedding config from
44
+ engine: MongoDBEngine instance
72
45
 
73
46
  Returns:
74
- EmbeddingService instance if embedding is enabled for this app, None otherwise
47
+ EmbeddingService instance if embedding is enabled, None otherwise
75
48
 
76
49
  Example:
77
- ```python
78
- from fastapi import Depends
79
- from mdb_engine.embeddings.dependencies import get_embedding_service_for_app
80
-
81
- @app.post("/embed")
82
- async def embed_endpoint(
83
- embedding_service = Depends(lambda: get_embedding_service_for_app("my_app"))
84
- ):
85
- if not embedding_service:
86
- raise HTTPException(503, "Embedding service not available")
87
- embeddings = await embedding_service.embed_chunks(["Hello world"])
88
- return {"embeddings": embeddings}
89
- ```
90
- """
91
- # Try to get engine from context if not provided
92
- if engine is None:
93
- engine = _global_engine
50
+ # In a background task or CLI
51
+ engine = MongoDBEngine(...)
52
+ await engine.initialize()
94
53
 
54
+ service = get_embedding_service_for_app("my_app", engine)
55
+ if service:
56
+ embeddings = await service.embed_chunks(["Hello world"])
57
+ """
95
58
  if engine is None:
96
59
  return None
97
60
 
@@ -108,86 +71,6 @@ def get_embedding_service_for_app(
108
71
  return get_embedding_service(config=embedding_config)
109
72
 
110
73
 
111
- def create_embedding_dependency(app_slug: str, engine: Optional[Any] = None):
112
- """
113
- Create a FastAPI dependency function for embedding service.
114
-
115
- This creates a dependency function that can be used with Depends()
116
- to inject the embedding service into route handlers.
117
-
118
- Args:
119
- app_slug: App slug
120
- engine: MongoDBEngine instance (optional)
121
-
122
- Returns:
123
- Dependency function that returns EmbeddingService or raises HTTPException
124
-
125
- Example:
126
- ```python
127
- from fastapi import Depends
128
- from mdb_engine.embeddings.dependencies import create_embedding_dependency
129
-
130
- embedding_dep = create_embedding_dependency("my_app", engine)
131
-
132
- @app.post("/embed")
133
- async def embed_endpoint(embedding_service = Depends(embedding_dep)):
134
- embeddings = await embedding_service.embed_chunks(["Hello world"])
135
- return {"embeddings": embeddings}
136
- ```
137
- """
138
-
139
- def _get_embedding_service() -> EmbeddingService:
140
- embedding_service = get_embedding_service_for_app(app_slug, engine)
141
- if embedding_service is None:
142
- if FASTAPI_AVAILABLE:
143
- raise HTTPException(
144
- status_code=503,
145
- detail=f"Embedding service not available for app '{app_slug}'. "
146
- "Ensure 'embedding_config.enabled' is true in manifest.json and "
147
- "embedding dependencies are installed.",
148
- )
149
- else:
150
- raise RuntimeError(
151
- f"Embedding service not available for app '{app_slug}'"
152
- )
153
- return embedding_service
154
-
155
- return _get_embedding_service
156
-
157
-
158
- def get_embedding_service_dependency(app_slug: str):
159
- """
160
- Get embedding service dependency using global engine.
161
-
162
- This is a convenience function that uses the global engine registry.
163
- Set the engine with set_global_engine() during app startup.
164
-
165
- Args:
166
- app_slug: App slug
167
-
168
- Returns:
169
- Dependency function for FastAPI Depends()
170
-
171
- Example:
172
- ```python
173
- from fastapi import FastAPI, Depends
174
- from mdb_engine.embeddings.dependencies import (
175
- set_global_engine, get_embedding_service_dependency
176
- )
177
-
178
- app = FastAPI()
179
-
180
- # During startup
181
- set_global_engine(engine, app_slug="my_app")
182
-
183
- # In routes
184
- @app.post("/embed")
185
- async def embed(embedding_service = Depends(get_embedding_service_dependency("my_app"))):
186
- return await embedding_service.embed_chunks(["Hello world"])
187
- ```
188
- """
189
- return create_embedding_dependency(app_slug, _global_engine)
190
-
191
-
192
- # Alias for backward compatibility
193
- get_embedding_service_dep = get_embedding_service_dependency
74
+ __all__ = [
75
+ "get_embedding_service_for_app",
76
+ ]
@@ -23,7 +23,7 @@ import os
23
23
  import time
24
24
  from abc import ABC, abstractmethod
25
25
  from datetime import datetime
26
- from typing import Any, Dict, List, Optional, Union
26
+ from typing import Any
27
27
 
28
28
  # Optional OpenAI SDK import
29
29
  try:
@@ -59,9 +59,7 @@ class BaseEmbeddingProvider(ABC):
59
59
  """
60
60
 
61
61
  @abstractmethod
62
- async def embed(
63
- self, text: Union[str, List[str]], model: Optional[str] = None
64
- ) -> List[List[float]]:
62
+ async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
65
63
  """
66
64
  Generate embeddings for text.
67
65
 
@@ -84,7 +82,7 @@ class OpenAIEmbeddingProvider(BaseEmbeddingProvider):
84
82
 
85
83
  def __init__(
86
84
  self,
87
- api_key: Optional[str] = None,
85
+ api_key: str | None = None,
88
86
  default_model: str = "text-embedding-3-small",
89
87
  ):
90
88
  """
@@ -108,9 +106,7 @@ class OpenAIEmbeddingProvider(BaseEmbeddingProvider):
108
106
  self.client = AsyncOpenAI(api_key=api_key)
109
107
  self.default_model = default_model
110
108
 
111
- async def embed(
112
- self, text: Union[str, List[str]], model: Optional[str] = None
113
- ) -> List[List[float]]:
109
+ async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
114
110
  """Generate embeddings using OpenAI."""
115
111
  model = model or self.default_model
116
112
 
@@ -134,7 +130,7 @@ class OpenAIEmbeddingProvider(BaseEmbeddingProvider):
134
130
  ConnectionError,
135
131
  OSError,
136
132
  ) as e:
137
- logger.error(f"OpenAI embedding failed: {e}")
133
+ logger.exception(f"OpenAI embedding failed: {e}")
138
134
  raise EmbeddingServiceError(f"OpenAI embedding failed: {str(e)}") from e
139
135
 
140
136
 
@@ -149,9 +145,9 @@ class AzureOpenAIEmbeddingProvider(BaseEmbeddingProvider):
149
145
 
150
146
  def __init__(
151
147
  self,
152
- api_key: Optional[str] = None,
153
- endpoint: Optional[str] = None,
154
- api_version: Optional[str] = None,
148
+ api_key: str | None = None,
149
+ endpoint: str | None = None,
150
+ api_version: str | None = None,
155
151
  default_model: str = "text-embedding-3-small",
156
152
  ):
157
153
  """
@@ -191,9 +187,7 @@ class AzureOpenAIEmbeddingProvider(BaseEmbeddingProvider):
191
187
  )
192
188
  self.default_model = default_model
193
189
 
194
- async def embed(
195
- self, text: Union[str, List[str]], model: Optional[str] = None
196
- ) -> List[List[float]]:
190
+ async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
197
191
  """Generate embeddings using Azure OpenAI."""
198
192
  model = model or self.default_model
199
193
 
@@ -217,10 +211,8 @@ class AzureOpenAIEmbeddingProvider(BaseEmbeddingProvider):
217
211
  ConnectionError,
218
212
  OSError,
219
213
  ) as e:
220
- logger.error(f"Azure OpenAI embedding failed: {e}")
221
- raise EmbeddingServiceError(
222
- f"Azure OpenAI embedding failed: {str(e)}"
223
- ) from e
214
+ logger.exception(f"Azure OpenAI embedding failed: {e}")
215
+ raise EmbeddingServiceError(f"Azure OpenAI embedding failed: {str(e)}") from e
224
216
 
225
217
 
226
218
  def _detect_provider_from_env() -> str:
@@ -257,8 +249,8 @@ class EmbeddingProvider:
257
249
 
258
250
  def __init__(
259
251
  self,
260
- embedding_provider: Optional[BaseEmbeddingProvider] = None,
261
- config: Optional[Dict[str, Any]] = None,
252
+ embedding_provider: BaseEmbeddingProvider | None = None,
253
+ config: dict[str, Any] | None = None,
262
254
  ):
263
255
  """
264
256
  Initialize Embedding Provider.
@@ -281,31 +273,21 @@ class EmbeddingProvider:
281
273
  else:
282
274
  # Auto-detect provider from environment variables
283
275
  provider_type = _detect_provider_from_env()
284
- default_model = (config or {}).get(
285
- "default_embedding_model", "text-embedding-3-small"
286
- )
276
+ default_model = (config or {}).get("default_embedding_model", "text-embedding-3-small")
287
277
 
288
278
  if provider_type == "azure":
289
- self.embedding_provider = AzureOpenAIEmbeddingProvider(
290
- default_model=default_model
291
- )
279
+ self.embedding_provider = AzureOpenAIEmbeddingProvider(default_model=default_model)
292
280
  logger.info(
293
281
  f"Auto-detected Azure OpenAI embedding provider (model: {default_model})"
294
282
  )
295
283
  else:
296
- self.embedding_provider = OpenAIEmbeddingProvider(
297
- default_model=default_model
298
- )
299
- logger.info(
300
- f"Auto-detected OpenAI embedding provider (model: {default_model})"
301
- )
284
+ self.embedding_provider = OpenAIEmbeddingProvider(default_model=default_model)
285
+ logger.info(f"Auto-detected OpenAI embedding provider (model: {default_model})")
302
286
 
303
287
  # Store config for potential future use
304
288
  self.config = config or {}
305
289
 
306
- async def embed(
307
- self, text: Union[str, List[str]], model: Optional[str] = None
308
- ) -> List[List[float]]:
290
+ async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
309
291
  """
310
292
  Generates vector embeddings for a string or list of strings.
311
293
 
@@ -341,7 +323,7 @@ class EmbeddingProvider:
341
323
  return vectors
342
324
 
343
325
  except (AttributeError, TypeError, ValueError, RuntimeError, KeyError) as e:
344
- logger.error(f"EMBED_FAILED: {str(e)}")
326
+ logger.exception(f"EMBED_FAILED: {str(e)}")
345
327
  raise EmbeddingServiceError(f"Embedding failed: {str(e)}") from e
346
328
 
347
329
 
@@ -371,10 +353,10 @@ class EmbeddingService:
371
353
 
372
354
  def __init__(
373
355
  self,
374
- embedding_provider: Optional[EmbeddingProvider] = None,
356
+ embedding_provider: EmbeddingProvider | None = None,
375
357
  default_max_tokens: int = 1000,
376
358
  default_tokenizer_model: str = "gpt-3.5-turbo",
377
- config: Optional[Dict[str, Any]] = None,
359
+ config: dict[str, Any] | None = None,
378
360
  ):
379
361
  """
380
362
  Initialize Embedding Service.
@@ -407,9 +389,7 @@ class EmbeddingService:
407
389
  self.default_max_tokens = default_max_tokens
408
390
  self.default_tokenizer_model = default_tokenizer_model
409
391
 
410
- def _create_splitter(
411
- self, max_tokens: int, tokenizer_model: Optional[str] = None
412
- ) -> TextSplitter:
392
+ def _create_splitter(self, max_tokens: int, tokenizer_model: str | None = None) -> TextSplitter:
413
393
  """
414
394
  Create a TextSplitter instance.
415
395
 
@@ -429,9 +409,9 @@ class EmbeddingService:
429
409
  async def chunk_text(
430
410
  self,
431
411
  text_content: str,
432
- max_tokens: Optional[int] = None,
433
- tokenizer_model: Optional[str] = None,
434
- ) -> List[str]:
412
+ max_tokens: int | None = None,
413
+ tokenizer_model: str | None = None,
414
+ ) -> list[str]:
435
415
  """
436
416
  Split text into semantic chunks.
437
417
 
@@ -465,32 +445,39 @@ class EmbeddingService:
465
445
  logger.error(f"Error chunking text: {e}", exc_info=True)
466
446
  raise EmbeddingServiceError(f"Chunking failed: {str(e)}") from e
467
447
 
468
- async def embed_chunks(
469
- self, chunks: List[str], model: Optional[str] = None
470
- ) -> List[List[float]]:
448
+ async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
471
449
  """
472
- Generate embeddings for text chunks.
450
+ Generate embeddings for text or a list of texts.
473
451
 
474
- Uses the user-provided embedding provider/function.
452
+ Natural API that works with both single strings and lists.
475
453
 
476
454
  Args:
477
- chunks: List of text chunks to embed
455
+ text: A single string or list of strings to embed
478
456
  model: Optional model identifier (passed to embedding provider)
479
457
 
480
458
  Returns:
481
- List of embedding vectors (each is a list of floats)
459
+ List of embedding vectors (each is a list of floats).
460
+ If input was a single string, returns a list containing one vector.
482
461
 
483
462
  Example:
484
- chunks = ["chunk 1", "chunk 2"]
485
- vectors = await service.embed_chunks(chunks, model="text-embedding-3-small")
463
+ # Single string
464
+ vectors = await service.embed("Hello world", model="text-embedding-3-small")
465
+ # vectors is [[0.1, 0.2, ...]]
466
+
467
+ # List of strings (batch - more efficient)
468
+ vectors = await service.embed(["chunk 1", "chunk 2"], model="text-embedding-3-small")
469
+ # vectors is [[0.1, ...], [0.2, ...]]
486
470
  """
471
+ # Normalize to list
472
+ chunks = [text] if isinstance(text, str) else text
473
+
487
474
  if not chunks:
488
475
  return []
489
476
 
490
477
  try:
491
478
  # Use EmbeddingProvider's embed method (handles retries, logging, etc.)
492
479
  vectors = await self.embedding_provider.embed(chunks, model=model)
493
- logger.info(f"Generated {len(vectors)} embeddings")
480
+ logger.info(f"Generated {len(vectors)} embedding(s)")
494
481
  return vectors
495
482
  except (
496
483
  AttributeError,
@@ -503,16 +490,36 @@ class EmbeddingService:
503
490
  logger.error(f"Error generating embeddings: {e}", exc_info=True)
504
491
  raise EmbeddingServiceError(f"Embedding generation failed: {str(e)}") from e
505
492
 
493
+ async def embed_chunks(self, chunks: list[str], model: str | None = None) -> list[list[float]]:
494
+ """
495
+ Generate embeddings for text chunks (list only).
496
+
497
+ DEPRECATED: Use embed() instead, which accepts both strings and lists.
498
+ This method is kept for backward compatibility.
499
+
500
+ Args:
501
+ chunks: List of text chunks to embed
502
+ model: Optional model identifier (passed to embedding provider)
503
+
504
+ Returns:
505
+ List of embedding vectors (each is a list of floats)
506
+
507
+ Example:
508
+ chunks = ["chunk 1", "chunk 2"]
509
+ vectors = await service.embed_chunks(chunks, model="text-embedding-3-small")
510
+ """
511
+ return await self.embed(chunks, model=model)
512
+
506
513
  async def process_and_store(
507
514
  self,
508
515
  text_content: str,
509
516
  source_id: str,
510
517
  collection: Any, # MongoDB collection (AppDB Collection or Motor collection)
511
- max_tokens: Optional[int] = None,
512
- tokenizer_model: Optional[str] = None,
513
- embedding_model: Optional[str] = None,
514
- metadata: Optional[Dict[str, Any]] = None,
515
- ) -> Dict[str, Any]:
518
+ max_tokens: int | None = None,
519
+ tokenizer_model: str | None = None,
520
+ embedding_model: str | None = None,
521
+ metadata: dict[str, Any] | None = None,
522
+ ) -> dict[str, Any]:
516
523
  """
517
524
  Process text and store chunks with embeddings in MongoDB.
518
525
 
@@ -573,7 +580,7 @@ class EmbeddingService:
573
580
  ConnectionError,
574
581
  OSError,
575
582
  ) as e:
576
- logger.error(f"Failed to generate embeddings for {source_id}: {e}")
583
+ logger.exception(f"Failed to generate embeddings for {source_id}: {e}")
577
584
  raise EmbeddingServiceError(f"Embedding generation failed: {str(e)}") from e
578
585
 
579
586
  if len(vectors) != len(chunks):
@@ -583,7 +590,7 @@ class EmbeddingService:
583
590
 
584
591
  # Step 3: Prepare documents for insertion
585
592
  documents_to_insert = []
586
- for i, (chunk_text, vector) in enumerate(zip(chunks, vectors)):
593
+ for i, (chunk_text, vector) in enumerate(zip(chunks, vectors, strict=False)):
587
594
  doc = {
588
595
  "source_id": source_id,
589
596
  "chunk_index": i,
@@ -614,9 +621,7 @@ class EmbeddingService:
614
621
  result = await collection.insert_many(documents_to_insert)
615
622
  inserted_count = len(result.inserted_ids)
616
623
 
617
- logger.info(
618
- f"Successfully inserted {inserted_count} documents for source: {source_id}"
619
- )
624
+ logger.info(f"Successfully inserted {inserted_count} documents for source: {source_id}")
620
625
 
621
626
  return {
622
627
  "chunks_created": len(chunks),
@@ -632,18 +637,16 @@ class EmbeddingService:
632
637
  KeyError,
633
638
  ConnectionError,
634
639
  ) as e:
635
- logger.error(
636
- f"Failed to store documents for {source_id}: {e}", exc_info=True
637
- )
640
+ logger.error(f"Failed to store documents for {source_id}: {e}", exc_info=True)
638
641
  raise EmbeddingServiceError(f"Storage failed: {str(e)}") from e
639
642
 
640
643
  async def process_text(
641
644
  self,
642
645
  text_content: str,
643
- max_tokens: Optional[int] = None,
644
- tokenizer_model: Optional[str] = None,
645
- embedding_model: Optional[str] = None,
646
- ) -> List[Dict[str, Any]]:
646
+ max_tokens: int | None = None,
647
+ tokenizer_model: str | None = None,
648
+ embedding_model: str | None = None,
649
+ ) -> list[dict[str, Any]]:
647
650
  """
648
651
  Process text and return chunks with embeddings (without storing).
649
652
 
@@ -687,7 +690,7 @@ class EmbeddingService:
687
690
 
688
691
  # Prepare results
689
692
  results = []
690
- for i, (chunk_text, vector) in enumerate(zip(chunks, vectors)):
693
+ for i, (chunk_text, vector) in enumerate(zip(chunks, vectors, strict=False)):
691
694
  results.append(
692
695
  {
693
696
  "chunk_index": i,
@@ -706,8 +709,8 @@ class EmbeddingService:
706
709
 
707
710
  # Dependency injection helper
708
711
  def get_embedding_service(
709
- embedding_provider: Optional[BaseEmbeddingProvider] = None,
710
- config: Optional[Dict[str, Any]] = None,
712
+ embedding_provider: BaseEmbeddingProvider | None = None,
713
+ config: dict[str, Any] | None = None,
711
714
  ) -> EmbeddingService:
712
715
  """
713
716
  Create EmbeddingService instance with auto-detected or provided embedding provider.