mdb-engine 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. mdb_engine/README.md +144 -0
  2. mdb_engine/__init__.py +37 -0
  3. mdb_engine/auth/README.md +631 -0
  4. mdb_engine/auth/__init__.py +128 -0
  5. mdb_engine/auth/casbin_factory.py +199 -0
  6. mdb_engine/auth/casbin_models.py +46 -0
  7. mdb_engine/auth/config_defaults.py +71 -0
  8. mdb_engine/auth/config_helpers.py +213 -0
  9. mdb_engine/auth/cookie_utils.py +158 -0
  10. mdb_engine/auth/decorators.py +350 -0
  11. mdb_engine/auth/dependencies.py +747 -0
  12. mdb_engine/auth/helpers.py +64 -0
  13. mdb_engine/auth/integration.py +578 -0
  14. mdb_engine/auth/jwt.py +225 -0
  15. mdb_engine/auth/middleware.py +241 -0
  16. mdb_engine/auth/oso_factory.py +323 -0
  17. mdb_engine/auth/provider.py +570 -0
  18. mdb_engine/auth/restrictions.py +271 -0
  19. mdb_engine/auth/session_manager.py +477 -0
  20. mdb_engine/auth/token_lifecycle.py +213 -0
  21. mdb_engine/auth/token_store.py +289 -0
  22. mdb_engine/auth/users.py +1516 -0
  23. mdb_engine/auth/utils.py +614 -0
  24. mdb_engine/cli/__init__.py +13 -0
  25. mdb_engine/cli/commands/__init__.py +7 -0
  26. mdb_engine/cli/commands/generate.py +105 -0
  27. mdb_engine/cli/commands/migrate.py +83 -0
  28. mdb_engine/cli/commands/show.py +70 -0
  29. mdb_engine/cli/commands/validate.py +63 -0
  30. mdb_engine/cli/main.py +41 -0
  31. mdb_engine/cli/utils.py +92 -0
  32. mdb_engine/config.py +217 -0
  33. mdb_engine/constants.py +160 -0
  34. mdb_engine/core/README.md +542 -0
  35. mdb_engine/core/__init__.py +42 -0
  36. mdb_engine/core/app_registration.py +392 -0
  37. mdb_engine/core/connection.py +243 -0
  38. mdb_engine/core/engine.py +749 -0
  39. mdb_engine/core/index_management.py +162 -0
  40. mdb_engine/core/manifest.py +2793 -0
  41. mdb_engine/core/seeding.py +179 -0
  42. mdb_engine/core/service_initialization.py +355 -0
  43. mdb_engine/core/types.py +413 -0
  44. mdb_engine/database/README.md +522 -0
  45. mdb_engine/database/__init__.py +31 -0
  46. mdb_engine/database/abstraction.py +635 -0
  47. mdb_engine/database/connection.py +387 -0
  48. mdb_engine/database/scoped_wrapper.py +1721 -0
  49. mdb_engine/embeddings/README.md +184 -0
  50. mdb_engine/embeddings/__init__.py +62 -0
  51. mdb_engine/embeddings/dependencies.py +193 -0
  52. mdb_engine/embeddings/service.py +759 -0
  53. mdb_engine/exceptions.py +167 -0
  54. mdb_engine/indexes/README.md +651 -0
  55. mdb_engine/indexes/__init__.py +21 -0
  56. mdb_engine/indexes/helpers.py +145 -0
  57. mdb_engine/indexes/manager.py +895 -0
  58. mdb_engine/memory/README.md +451 -0
  59. mdb_engine/memory/__init__.py +30 -0
  60. mdb_engine/memory/service.py +1285 -0
  61. mdb_engine/observability/README.md +515 -0
  62. mdb_engine/observability/__init__.py +42 -0
  63. mdb_engine/observability/health.py +296 -0
  64. mdb_engine/observability/logging.py +161 -0
  65. mdb_engine/observability/metrics.py +297 -0
  66. mdb_engine/routing/README.md +462 -0
  67. mdb_engine/routing/__init__.py +73 -0
  68. mdb_engine/routing/websockets.py +813 -0
  69. mdb_engine/utils/__init__.py +7 -0
  70. mdb_engine-0.1.6.dist-info/METADATA +213 -0
  71. mdb_engine-0.1.6.dist-info/RECORD +75 -0
  72. mdb_engine-0.1.6.dist-info/WHEEL +5 -0
  73. mdb_engine-0.1.6.dist-info/entry_points.txt +2 -0
  74. mdb_engine-0.1.6.dist-info/licenses/LICENSE +661 -0
  75. mdb_engine-0.1.6.dist-info/top_level.txt +1 -0
@@ -0,0 +1,184 @@
1
+ # Embeddings Service Module
2
+
3
+ Semantic text splitting and embedding generation for MDB_ENGINE applications.
4
+
5
+ ## Features
6
+
7
+ - **Semantic Text Splitting**: Rust-based semantic-text-splitter for intelligent chunking
8
+ - **OpenAI & AzureOpenAI Support**: Auto-detects provider from environment variables
9
+ - **Token-Aware**: Never exceeds model token limits
10
+ - **Batch Processing**: Efficient batch embedding generation
11
+ - **MongoDB Integration**: Built-in support for storing embeddings with metadata
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ pip install semantic-text-splitter openai
17
+ ```
18
+
19
+ ## Configuration
20
+
21
+ The embedding service auto-detects the provider from environment variables (same logic as mem0):
22
+
23
+ - **OpenAI**: Requires `OPENAI_API_KEY`
24
+ - **AzureOpenAI**: Requires `AZURE_OPENAI_API_KEY` and `AZURE_OPENAI_ENDPOINT`
25
+
26
+ Enable embedding service in your `manifest.json`:
27
+
28
+ ```json
29
+ {
30
+ "embedding_config": {
31
+ "enabled": true,
32
+ "default_embedding_model": "text-embedding-3-small",
33
+ "max_tokens_per_chunk": 1000,
34
+ "tokenizer_model": "gpt-3.5-turbo"
35
+ }
36
+ }
37
+ ```
38
+
39
+ ## Usage
40
+
41
+ ### 1. Basic Usage (Auto-Detection)
42
+
43
+ ```python
44
+ from mdb_engine.embeddings import EmbeddingService
45
+
46
+ # Initialize - auto-detects OpenAI or AzureOpenAI from environment variables
47
+ embedding_service = EmbeddingService()
48
+
49
+ # Chunk text
50
+ chunks = await embedding_service.chunk_text(
51
+ text_content="Your long document here...",
52
+ max_tokens=1000
53
+ )
54
+
55
+ # Generate embeddings
56
+ vectors = await embedding_service.embed_chunks(chunks, model="text-embedding-3-small")
57
+ ```
58
+
59
+ ### 2. Process and Store in MongoDB
60
+
61
+ ```python
62
+ from mdb_engine.embeddings import EmbeddingService
63
+
64
+ embedding_service = EmbeddingService()
65
+
66
+ # Process text and store in MongoDB
67
+ result = await embedding_service.process_and_store(
68
+ text_content="Your long document here...",
69
+ source_id="doc_101",
70
+ collection=db.knowledge_base,
71
+ max_tokens=1000,
72
+ metadata={"source": "document.pdf", "page": 1}
73
+ )
74
+
75
+ print(f"Created {result['chunks_created']} chunks")
76
+ ```
77
+
78
+ ### 3. Explicit Provider
79
+
80
+ ```python
81
+ from mdb_engine.embeddings import EmbeddingService, OpenAIEmbeddingProvider, EmbeddingProvider
82
+
83
+ # Use OpenAI explicitly
84
+ openai_provider = OpenAIEmbeddingProvider(default_model="text-embedding-3-small")
85
+ provider = EmbeddingProvider(embedding_provider=openai_provider)
86
+ embedding_service = EmbeddingService(embedding_provider=provider)
87
+ ```
88
+
89
+ ### 4. In FastAPI Routes
90
+
91
+ ```python
92
+ from fastapi import FastAPI, Depends
93
+ from mdb_engine.embeddings.dependencies import get_embedding_service_dependency
94
+ from mdb_engine.embeddings import EmbeddingService
95
+
96
+ app = FastAPI()
97
+
98
+ # Set global engine during startup
99
+ from mdb_engine.embeddings.dependencies import set_global_engine
100
+ set_global_engine(engine, app_slug="my_app")
101
+
102
+ @app.post("/embed")
103
+ async def embed_endpoint(
104
+ embedding_service: EmbeddingService = Depends(get_embedding_service_dependency("my_app"))
105
+ ):
106
+ embeddings = await embedding_service.embed_chunks(["Hello world"])
107
+ return {"embeddings": embeddings}
108
+ ```
109
+
110
+ ## Environment Variables
111
+
112
+ ### OpenAI
113
+ ```bash
114
+ export OPENAI_API_KEY="sk-..."
115
+ ```
116
+
117
+ ### AzureOpenAI
118
+ ```bash
119
+ export AZURE_OPENAI_API_KEY="..."
120
+ export AZURE_OPENAI_ENDPOINT="https://your-resource.openai.azure.com/"
121
+ export AZURE_OPENAI_API_VERSION="2024-02-15-preview" # Optional
122
+ ```
123
+
124
+ ## EmbeddingService Methods
125
+
126
+ ### `chunk_text(text_content, max_tokens=None, tokenizer_model=None)`
127
+
128
+ Split text into semantic chunks.
129
+
130
+ ```python
131
+ chunks = await service.chunk_text("Long document...", max_tokens=1000)
132
+ ```
133
+
134
+ ### `embed_chunks(chunks, model=None)`
135
+
136
+ Generate embeddings for text chunks.
137
+
138
+ ```python
139
+ vectors = await service.embed_chunks(chunks, model="text-embedding-3-small")
140
+ ```
141
+
142
+ ### `process_and_store(text_content, source_id, collection, ...)`
143
+
144
+ Process text and store chunks with embeddings in MongoDB.
145
+
146
+ ```python
147
+ result = await service.process_and_store(
148
+ text_content="Long document...",
149
+ source_id="doc_101",
150
+ collection=db.knowledge_base
151
+ )
152
+ ```
153
+
154
+ ### `process_text(text_content, max_tokens=None, ...)`
155
+
156
+ Process text and return chunks with embeddings (without storing).
157
+
158
+ ```python
159
+ results = await service.process_text("Long document...")
160
+ ```
161
+
162
+ ## Supported Models
163
+
164
+ - **OpenAI**: `text-embedding-3-small`, `text-embedding-3-large`, `text-embedding-ada-002`
165
+ - **AzureOpenAI**: Any Azure OpenAI embedding deployment (e.g., `text-embedding-3-small`)
166
+
167
+ ## Error Handling
168
+
169
+ All embedding operations raise `EmbeddingServiceError` on failure:
170
+
171
+ ```python
172
+ from mdb_engine.embeddings import EmbeddingServiceError
173
+
174
+ try:
175
+ vectors = await service.embed_chunks(["Hello"])
176
+ except EmbeddingServiceError as e:
177
+ print(f"Embedding failed: {e}")
178
+ ```
179
+
180
+ ## Notes
181
+
182
+ - The embedding service uses the same auto-detection logic as mem0 for consistency
183
+ - LLM functionality (chat completions, structured extraction) should be implemented directly at the example level using the OpenAI SDK or your preferred provider
184
+ - For memory functionality, use `mdb_engine.memory.Mem0MemoryService` which handles embeddings and LLM via environment variables
@@ -0,0 +1,62 @@
1
+ """
2
+ Embeddings Service Module
3
+
4
+ Provides EmbeddingService for semantic text splitting and embedding generation.
5
+ Examples should implement their own LLM clients directly using the OpenAI SDK.
6
+
7
+ For memory functionality, use mdb_engine.memory.Mem0MemoryService which
8
+ handles embeddings and LLM via environment variables (.env).
9
+
10
+ Example LLM implementation:
11
+ from openai import AzureOpenAI
12
+ from dotenv import load_dotenv
13
+ import os
14
+
15
+ load_dotenv()
16
+
17
+ client = AzureOpenAI(
18
+ api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-01"),
19
+ azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
20
+ api_key=os.getenv("AZURE_OPENAI_API_KEY")
21
+ )
22
+
23
+ completion = client.chat.completions.create(
24
+ model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
25
+ messages=[...]
26
+ )
27
+
28
+ Example EmbeddingService usage:
29
+ from mdb_engine.embeddings import EmbeddingService, get_embedding_service
30
+
31
+ # In FastAPI route
32
+ @app.post("/embed")
33
+ async def embed_text(embedding_service: EmbeddingService = Depends(get_embedding_service)):
34
+ embeddings = await embedding_service.embed_chunks(["Hello world"])
35
+ return {"embeddings": embeddings}
36
+ """
37
+
38
+ from .dependencies import (create_embedding_dependency,
39
+ get_embedding_service_dep,
40
+ get_embedding_service_dependency,
41
+ get_embedding_service_for_app, get_global_engine,
42
+ set_global_engine)
43
+ from .service import (AzureOpenAIEmbeddingProvider, BaseEmbeddingProvider,
44
+ EmbeddingProvider, EmbeddingService,
45
+ EmbeddingServiceError, OpenAIEmbeddingProvider,
46
+ get_embedding_service)
47
+
48
+ __all__ = [
49
+ "EmbeddingService",
50
+ "EmbeddingServiceError",
51
+ "BaseEmbeddingProvider",
52
+ "OpenAIEmbeddingProvider",
53
+ "AzureOpenAIEmbeddingProvider",
54
+ "EmbeddingProvider",
55
+ "get_embedding_service",
56
+ "get_embedding_service_for_app",
57
+ "create_embedding_dependency",
58
+ "set_global_engine",
59
+ "get_global_engine",
60
+ "get_embedding_service_dependency",
61
+ "get_embedding_service_dep",
62
+ ]
@@ -0,0 +1,193 @@
1
+ """
2
+ Embedding Service Dependency Injection for FastAPI
3
+
4
+ This module provides FastAPI dependency functions to inject embedding services
5
+ into route handlers. The embedding service is automatically initialized from
6
+ the app's manifest.json configuration.
7
+ """
8
+
9
+ from typing import Any, Optional
10
+
11
+ # Optional FastAPI import (only needed if FastAPI is available)
12
+ try:
13
+ from fastapi import Depends, HTTPException
14
+
15
+ FASTAPI_AVAILABLE = True
16
+ except ImportError:
17
+ FASTAPI_AVAILABLE = False
18
+
19
+ # Stub for when FastAPI is not available
20
+ def Depends(*args, **kwargs):
21
+ return None
22
+
23
+ class HTTPException(Exception):
24
+ pass
25
+
26
+
27
+ from .service import EmbeddingService, get_embedding_service
28
+
29
+ # Global engine registry (for apps that don't pass engine explicitly)
30
+ _global_engine: Optional[Any] = None
31
+ _global_app_slug: Optional[str] = None
32
+
33
+
34
+ def set_global_engine(engine: Any, app_slug: Optional[str] = None) -> None:
35
+ """
36
+ Set global MongoDBEngine instance for embedding dependency injection.
37
+
38
+ This is useful when you have a single engine instance that you want
39
+ to use across all apps. Call this during application startup.
40
+
41
+ Args:
42
+ engine: MongoDBEngine instance
43
+ app_slug: Optional app slug
44
+ """
45
+ global _global_engine, _global_app_slug
46
+ _global_engine = engine
47
+ _global_app_slug = app_slug
48
+
49
+
50
+ def get_global_engine() -> Optional[Any]:
51
+ """
52
+ Get global MongoDBEngine instance.
53
+
54
+ Returns:
55
+ MongoDBEngine instance if set, None otherwise
56
+ """
57
+ return _global_engine
58
+
59
+
60
+ def get_embedding_service_for_app(
61
+ app_slug: str, engine: Optional[Any] = None
62
+ ) -> Optional[EmbeddingService]:
63
+ """
64
+ Get embedding service for a specific app.
65
+
66
+ This is a helper function that can be used with FastAPI's Depends()
67
+ to inject the embedding service into route handlers.
68
+
69
+ Args:
70
+ app_slug: App slug (typically extracted from route context)
71
+ engine: MongoDBEngine instance (optional, will try to get from context)
72
+
73
+ Returns:
74
+ EmbeddingService instance if embedding is enabled for this app, None otherwise
75
+
76
+ Example:
77
+ ```python
78
+ from fastapi import Depends
79
+ from mdb_engine.embeddings.dependencies import get_embedding_service_for_app
80
+
81
+ @app.post("/embed")
82
+ async def embed_endpoint(
83
+ embedding_service = Depends(lambda: get_embedding_service_for_app("my_app"))
84
+ ):
85
+ if not embedding_service:
86
+ raise HTTPException(503, "Embedding service not available")
87
+ embeddings = await embedding_service.embed_chunks(["Hello world"])
88
+ return {"embeddings": embeddings}
89
+ ```
90
+ """
91
+ # Try to get engine from context if not provided
92
+ if engine is None:
93
+ engine = _global_engine
94
+
95
+ if engine is None:
96
+ return None
97
+
98
+ # Get app config to extract embedding_config
99
+ app_config = engine.get_app(app_slug)
100
+ if not app_config:
101
+ return None
102
+
103
+ embedding_config = app_config.get("embedding_config", {})
104
+ if not embedding_config.get("enabled", True):
105
+ return None
106
+
107
+ # Create embedding service with config
108
+ return get_embedding_service(config=embedding_config)
109
+
110
+
111
+ def create_embedding_dependency(app_slug: str, engine: Optional[Any] = None):
112
+ """
113
+ Create a FastAPI dependency function for embedding service.
114
+
115
+ This creates a dependency function that can be used with Depends()
116
+ to inject the embedding service into route handlers.
117
+
118
+ Args:
119
+ app_slug: App slug
120
+ engine: MongoDBEngine instance (optional)
121
+
122
+ Returns:
123
+ Dependency function that returns EmbeddingService or raises HTTPException
124
+
125
+ Example:
126
+ ```python
127
+ from fastapi import Depends
128
+ from mdb_engine.embeddings.dependencies import create_embedding_dependency
129
+
130
+ embedding_dep = create_embedding_dependency("my_app", engine)
131
+
132
+ @app.post("/embed")
133
+ async def embed_endpoint(embedding_service = Depends(embedding_dep)):
134
+ embeddings = await embedding_service.embed_chunks(["Hello world"])
135
+ return {"embeddings": embeddings}
136
+ ```
137
+ """
138
+
139
+ def _get_embedding_service() -> EmbeddingService:
140
+ embedding_service = get_embedding_service_for_app(app_slug, engine)
141
+ if embedding_service is None:
142
+ if FASTAPI_AVAILABLE:
143
+ raise HTTPException(
144
+ status_code=503,
145
+ detail=f"Embedding service not available for app '{app_slug}'. "
146
+ "Ensure 'embedding_config.enabled' is true in manifest.json and "
147
+ "embedding dependencies are installed.",
148
+ )
149
+ else:
150
+ raise RuntimeError(
151
+ f"Embedding service not available for app '{app_slug}'"
152
+ )
153
+ return embedding_service
154
+
155
+ return _get_embedding_service
156
+
157
+
158
+ def get_embedding_service_dependency(app_slug: str):
159
+ """
160
+ Get embedding service dependency using global engine.
161
+
162
+ This is a convenience function that uses the global engine registry.
163
+ Set the engine with set_global_engine() during app startup.
164
+
165
+ Args:
166
+ app_slug: App slug
167
+
168
+ Returns:
169
+ Dependency function for FastAPI Depends()
170
+
171
+ Example:
172
+ ```python
173
+ from fastapi import FastAPI, Depends
174
+ from mdb_engine.embeddings.dependencies import (
175
+ set_global_engine, get_embedding_service_dependency
176
+ )
177
+
178
+ app = FastAPI()
179
+
180
+ # During startup
181
+ set_global_engine(engine, app_slug="my_app")
182
+
183
+ # In routes
184
+ @app.post("/embed")
185
+ async def embed(embedding_service = Depends(get_embedding_service_dependency("my_app"))):
186
+ return await embedding_service.embed_chunks(["Hello world"])
187
+ ```
188
+ """
189
+ return create_embedding_dependency(app_slug, _global_engine)
190
+
191
+
192
+ # Alias for backward compatibility
193
+ get_embedding_service_dep = get_embedding_service_dependency