django-cfg 1.1.82__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (244) hide show
  1. django_cfg/__init__.py +20 -448
  2. django_cfg/apps/accounts/README.md +3 -3
  3. django_cfg/apps/accounts/admin/__init__.py +0 -2
  4. django_cfg/apps/accounts/admin/activity.py +2 -9
  5. django_cfg/apps/accounts/admin/filters.py +0 -42
  6. django_cfg/apps/accounts/admin/inlines.py +8 -8
  7. django_cfg/apps/accounts/admin/otp.py +5 -5
  8. django_cfg/apps/accounts/admin/registration_source.py +1 -8
  9. django_cfg/apps/accounts/admin/user.py +12 -20
  10. django_cfg/apps/accounts/managers/user_manager.py +2 -129
  11. django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
  12. django_cfg/apps/accounts/models.py +3 -123
  13. django_cfg/apps/accounts/serializers/otp.py +40 -44
  14. django_cfg/apps/accounts/serializers/profile.py +0 -2
  15. django_cfg/apps/accounts/services/otp_service.py +98 -186
  16. django_cfg/apps/accounts/signals.py +25 -15
  17. django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
  18. django_cfg/apps/accounts/views/otp.py +35 -36
  19. django_cfg/apps/agents/README.md +129 -0
  20. django_cfg/apps/agents/__init__.py +68 -0
  21. django_cfg/apps/agents/admin/__init__.py +17 -0
  22. django_cfg/apps/agents/admin/execution_admin.py +460 -0
  23. django_cfg/apps/agents/admin/registry_admin.py +360 -0
  24. django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
  25. django_cfg/apps/agents/apps.py +29 -0
  26. django_cfg/apps/agents/core/__init__.py +20 -0
  27. django_cfg/apps/agents/core/agent.py +281 -0
  28. django_cfg/apps/agents/core/dependencies.py +154 -0
  29. django_cfg/apps/agents/core/exceptions.py +66 -0
  30. django_cfg/apps/agents/core/models.py +106 -0
  31. django_cfg/apps/agents/core/orchestrator.py +391 -0
  32. django_cfg/apps/agents/examples/__init__.py +3 -0
  33. django_cfg/apps/agents/examples/simple_example.py +161 -0
  34. django_cfg/apps/agents/integration/__init__.py +14 -0
  35. django_cfg/apps/agents/integration/middleware.py +80 -0
  36. django_cfg/apps/agents/integration/registry.py +345 -0
  37. django_cfg/apps/agents/integration/signals.py +50 -0
  38. django_cfg/apps/agents/management/__init__.py +3 -0
  39. django_cfg/apps/agents/management/commands/__init__.py +3 -0
  40. django_cfg/apps/agents/management/commands/create_agent.py +365 -0
  41. django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
  42. django_cfg/apps/agents/managers/__init__.py +23 -0
  43. django_cfg/apps/agents/managers/execution.py +236 -0
  44. django_cfg/apps/agents/managers/registry.py +254 -0
  45. django_cfg/apps/agents/managers/toolsets.py +496 -0
  46. django_cfg/apps/agents/migrations/0001_initial.py +286 -0
  47. django_cfg/apps/agents/migrations/__init__.py +5 -0
  48. django_cfg/apps/agents/models/__init__.py +15 -0
  49. django_cfg/apps/agents/models/execution.py +215 -0
  50. django_cfg/apps/agents/models/registry.py +220 -0
  51. django_cfg/apps/agents/models/toolsets.py +305 -0
  52. django_cfg/apps/agents/patterns/__init__.py +24 -0
  53. django_cfg/apps/agents/patterns/content_agents.py +234 -0
  54. django_cfg/apps/agents/toolsets/__init__.py +15 -0
  55. django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
  56. django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
  57. django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
  58. django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
  59. django_cfg/apps/agents/urls.py +46 -0
  60. django_cfg/apps/knowbase/README.md +150 -0
  61. django_cfg/apps/knowbase/__init__.py +27 -0
  62. django_cfg/apps/knowbase/admin/__init__.py +23 -0
  63. django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
  64. django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
  65. django_cfg/apps/knowbase/admin/document_admin.py +650 -0
  66. django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
  67. django_cfg/apps/knowbase/apps.py +81 -0
  68. django_cfg/apps/knowbase/config/README.md +176 -0
  69. django_cfg/apps/knowbase/config/__init__.py +51 -0
  70. django_cfg/apps/knowbase/config/constance_fields.py +186 -0
  71. django_cfg/apps/knowbase/config/constance_settings.py +200 -0
  72. django_cfg/apps/knowbase/config/settings.py +450 -0
  73. django_cfg/apps/knowbase/examples/__init__.py +3 -0
  74. django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
  75. django_cfg/apps/knowbase/management/__init__.py +0 -0
  76. django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
  77. django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
  78. django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
  79. django_cfg/apps/knowbase/managers/__init__.py +22 -0
  80. django_cfg/apps/knowbase/managers/archive.py +426 -0
  81. django_cfg/apps/knowbase/managers/base.py +32 -0
  82. django_cfg/apps/knowbase/managers/chat.py +141 -0
  83. django_cfg/apps/knowbase/managers/document.py +203 -0
  84. django_cfg/apps/knowbase/managers/external_data.py +471 -0
  85. django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
  86. django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
  87. django_cfg/apps/knowbase/migrations/__init__.py +5 -0
  88. django_cfg/apps/knowbase/mixins/__init__.py +15 -0
  89. django_cfg/apps/knowbase/mixins/config.py +108 -0
  90. django_cfg/apps/knowbase/mixins/creator.py +81 -0
  91. django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
  92. django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
  93. django_cfg/apps/knowbase/mixins/service.py +362 -0
  94. django_cfg/apps/knowbase/models/__init__.py +41 -0
  95. django_cfg/apps/knowbase/models/archive.py +599 -0
  96. django_cfg/apps/knowbase/models/base.py +58 -0
  97. django_cfg/apps/knowbase/models/chat.py +157 -0
  98. django_cfg/apps/knowbase/models/document.py +267 -0
  99. django_cfg/apps/knowbase/models/external_data.py +376 -0
  100. django_cfg/apps/knowbase/serializers/__init__.py +68 -0
  101. django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
  102. django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
  103. django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
  104. django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
  105. django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
  106. django_cfg/apps/knowbase/services/__init__.py +40 -0
  107. django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
  108. django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
  109. django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
  110. django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
  111. django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
  112. django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
  113. django_cfg/apps/knowbase/services/base.py +53 -0
  114. django_cfg/apps/knowbase/services/chat_service.py +239 -0
  115. django_cfg/apps/knowbase/services/document_service.py +144 -0
  116. django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
  117. django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
  118. django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
  119. django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
  120. django_cfg/apps/knowbase/services/embedding/models.py +229 -0
  121. django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
  122. django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
  123. django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
  124. django_cfg/apps/knowbase/services/search_service.py +293 -0
  125. django_cfg/apps/knowbase/signals/__init__.py +21 -0
  126. django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
  127. django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
  128. django_cfg/apps/knowbase/signals/document_signals.py +143 -0
  129. django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
  130. django_cfg/apps/knowbase/tasks/__init__.py +39 -0
  131. django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
  132. django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
  133. django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
  134. django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
  135. django_cfg/apps/knowbase/urls.py +43 -0
  136. django_cfg/apps/knowbase/utils/__init__.py +12 -0
  137. django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
  138. django_cfg/apps/knowbase/utils/text_processing.py +375 -0
  139. django_cfg/apps/knowbase/utils/validation.py +99 -0
  140. django_cfg/apps/knowbase/views/__init__.py +28 -0
  141. django_cfg/apps/knowbase/views/archive_views.py +469 -0
  142. django_cfg/apps/knowbase/views/base.py +49 -0
  143. django_cfg/apps/knowbase/views/chat_views.py +181 -0
  144. django_cfg/apps/knowbase/views/document_views.py +183 -0
  145. django_cfg/apps/knowbase/views/public_views.py +129 -0
  146. django_cfg/apps/leads/admin.py +70 -0
  147. django_cfg/apps/newsletter/admin.py +234 -0
  148. django_cfg/apps/newsletter/admin_filters.py +124 -0
  149. django_cfg/apps/support/admin.py +196 -0
  150. django_cfg/apps/support/admin_filters.py +71 -0
  151. django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
  152. django_cfg/apps/urls.py +5 -4
  153. django_cfg/cli/README.md +1 -1
  154. django_cfg/cli/commands/create_project.py +2 -2
  155. django_cfg/cli/commands/info.py +1 -1
  156. django_cfg/config.py +44 -0
  157. django_cfg/core/config.py +29 -82
  158. django_cfg/core/environment.py +1 -1
  159. django_cfg/core/generation.py +19 -107
  160. django_cfg/{integration.py → core/integration.py} +18 -16
  161. django_cfg/core/validation.py +1 -1
  162. django_cfg/management/__init__.py +1 -1
  163. django_cfg/management/commands/__init__.py +1 -1
  164. django_cfg/management/commands/auto_generate.py +482 -0
  165. django_cfg/management/commands/migrator.py +19 -101
  166. django_cfg/management/commands/test_email.py +1 -1
  167. django_cfg/middleware/README.md +0 -158
  168. django_cfg/middleware/__init__.py +0 -2
  169. django_cfg/middleware/user_activity.py +3 -3
  170. django_cfg/models/api.py +145 -0
  171. django_cfg/models/base.py +287 -0
  172. django_cfg/models/cache.py +4 -4
  173. django_cfg/models/constance.py +25 -88
  174. django_cfg/models/database.py +9 -9
  175. django_cfg/models/drf.py +3 -36
  176. django_cfg/models/email.py +163 -0
  177. django_cfg/models/environment.py +276 -0
  178. django_cfg/models/limits.py +1 -1
  179. django_cfg/models/logging.py +366 -0
  180. django_cfg/models/revolution.py +41 -2
  181. django_cfg/models/security.py +125 -0
  182. django_cfg/models/services.py +1 -1
  183. django_cfg/modules/__init__.py +2 -56
  184. django_cfg/modules/base.py +78 -52
  185. django_cfg/modules/django_currency/service.py +2 -2
  186. django_cfg/modules/django_email.py +2 -2
  187. django_cfg/modules/django_health.py +267 -0
  188. django_cfg/modules/django_llm/llm/client.py +91 -19
  189. django_cfg/modules/django_llm/translator/translator.py +2 -2
  190. django_cfg/modules/django_logger.py +2 -2
  191. django_cfg/modules/django_ngrok.py +2 -2
  192. django_cfg/modules/django_tasks.py +68 -3
  193. django_cfg/modules/django_telegram.py +3 -3
  194. django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
  195. django_cfg/modules/django_twilio/service.py +2 -2
  196. django_cfg/modules/django_twilio/simple_service.py +2 -2
  197. django_cfg/modules/django_twilio/twilio_service.py +2 -2
  198. django_cfg/modules/django_unfold/__init__.py +69 -0
  199. django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
  200. django_cfg/modules/django_unfold/dashboard.py +278 -0
  201. django_cfg/modules/django_unfold/icons/README.md +145 -0
  202. django_cfg/modules/django_unfold/icons/__init__.py +12 -0
  203. django_cfg/modules/django_unfold/icons/constants.py +2851 -0
  204. django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
  205. django_cfg/modules/django_unfold/models/__init__.py +42 -0
  206. django_cfg/modules/django_unfold/models/config.py +601 -0
  207. django_cfg/modules/django_unfold/models/dashboard.py +206 -0
  208. django_cfg/modules/django_unfold/models/dropdown.py +40 -0
  209. django_cfg/modules/django_unfold/models/navigation.py +73 -0
  210. django_cfg/modules/django_unfold/models/tabs.py +25 -0
  211. django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
  212. django_cfg/modules/django_unfold/utils.py +140 -0
  213. django_cfg/registry/__init__.py +23 -0
  214. django_cfg/registry/core.py +61 -0
  215. django_cfg/registry/exceptions.py +11 -0
  216. django_cfg/registry/modules.py +12 -0
  217. django_cfg/registry/services.py +26 -0
  218. django_cfg/registry/third_party.py +52 -0
  219. django_cfg/routing/__init__.py +19 -0
  220. django_cfg/routing/callbacks.py +198 -0
  221. django_cfg/routing/routers.py +48 -0
  222. django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
  223. django_cfg/templatetags/__init__.py +0 -0
  224. django_cfg/templatetags/django_cfg.py +33 -0
  225. django_cfg/urls.py +33 -0
  226. django_cfg/utils/path_resolution.py +1 -1
  227. django_cfg/utils/smart_defaults.py +7 -61
  228. django_cfg/utils/toolkit.py +663 -0
  229. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/METADATA +83 -86
  230. django_cfg-1.2.1.dist-info/RECORD +441 -0
  231. django_cfg/archive/django_sample.zip +0 -0
  232. django_cfg/models/unfold.py +0 -271
  233. django_cfg/modules/unfold/__init__.py +0 -29
  234. django_cfg/modules/unfold/dashboard.py +0 -318
  235. django_cfg/pyproject.toml +0 -370
  236. django_cfg/routers.py +0 -83
  237. django_cfg-1.1.82.dist-info/RECORD +0 -278
  238. /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
  239. /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
  240. /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
  241. /django_cfg/{version_check.py → utils/version_check.py} +0 -0
  242. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/WHEEL +0 -0
  243. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/entry_points.txt +0 -0
  244. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,144 @@
1
+ """
2
+ Document management service.
3
+ """
4
+
5
+ from typing import List, Optional, Dict, Any
6
+ from django.db import transaction, models
7
+ from django.utils import timezone
8
+ from ..models import Document, DocumentChunk, ProcessingStatus
9
+ from .base import BaseService
10
+
11
+
12
+ class DocumentService(BaseService):
13
+ """Service for document management and processing."""
14
+
15
+ def create_document(
16
+ self,
17
+ title: str,
18
+ content: str,
19
+ file_type: str = "text/plain",
20
+ metadata: Optional[Dict[str, Any]] = None
21
+ ) -> Document:
22
+ """Create document and trigger async processing."""
23
+
24
+ # Generate content hash for duplicate detection
25
+ content_hash = self._generate_content_hash(content)
26
+
27
+ # Check for duplicates
28
+ existing = Document.objects.filter(
29
+ user=self.user,
30
+ content_hash=content_hash
31
+ ).first()
32
+
33
+ if existing:
34
+ raise ValueError(f"Document with same content already exists: {existing.title}")
35
+
36
+ # Create document (async processing will be triggered by post_save signal)
37
+ document = Document.objects.create(
38
+ user=self.user,
39
+ title=title,
40
+ content=content,
41
+ content_hash=content_hash,
42
+ file_type=file_type,
43
+ file_size=len(content.encode('utf-8')),
44
+ metadata=metadata or {},
45
+ processing_status=ProcessingStatus.PENDING
46
+ )
47
+
48
+ return document
49
+
50
+ def get_document(self, document_id: str) -> Optional[Document]:
51
+ """Get document by ID with user access check."""
52
+ try:
53
+ document = Document.objects.get(
54
+ id=document_id,
55
+ user=self.user
56
+ )
57
+ return document
58
+ except Document.DoesNotExist:
59
+ return None
60
+
61
+ def get_user_documents(self, status: Optional[str] = None):
62
+ """Get user documents queryset with filtering."""
63
+ queryset = Document.objects.filter(user=self.user)
64
+
65
+ if status:
66
+ queryset = queryset.filter(processing_status=status)
67
+
68
+ return queryset.order_by('-created_at')
69
+
70
+ def list_documents(
71
+ self,
72
+ status: Optional[str] = None,
73
+ limit: int = 20,
74
+ offset: int = 0
75
+ ) -> List[Document]:
76
+ """List user documents with filtering."""
77
+ queryset = self.get_user_documents(status)
78
+ return list(queryset[offset:offset + limit])
79
+
80
+ def delete_document(self, document_id: str) -> bool:
81
+ """Delete document and all associated chunks."""
82
+ try:
83
+ with transaction.atomic():
84
+ document = Document.objects.get(
85
+ id=document_id,
86
+ user=self.user
87
+ )
88
+
89
+ # Delete associated chunks first
90
+ DocumentChunk.objects.filter(document=document).delete()
91
+
92
+ # Delete document
93
+ document.delete()
94
+
95
+ return True
96
+ except Document.DoesNotExist:
97
+ return False
98
+
99
+ def get_processing_stats(self) -> Dict[str, Any]:
100
+ """Get user's document processing statistics."""
101
+
102
+ from django.db.models import Count, Sum, Avg
103
+
104
+ stats = Document.objects.filter(user=self.user).aggregate(
105
+ total_documents=Count('id'),
106
+ completed_documents=Count('id', filter=models.Q(processing_status=ProcessingStatus.COMPLETED)),
107
+ total_chunks=Sum('chunks_count'),
108
+ total_tokens=Sum('total_tokens'),
109
+ total_cost=Sum('total_cost_usd'),
110
+ )
111
+
112
+ return {
113
+ 'total_documents': stats['total_documents'] or 0,
114
+ 'completed_documents': stats['completed_documents'] or 0,
115
+ 'processing_success_rate': (
116
+ (stats['completed_documents'] / stats['total_documents'] * 100)
117
+ if stats['total_documents'] > 0 else 0
118
+ ),
119
+ 'total_chunks': stats['total_chunks'] or 0,
120
+ 'total_tokens': stats['total_tokens'] or 0,
121
+ 'total_cost_usd': float(stats['total_cost'] or 0),
122
+ 'avg_processing_time_seconds': 0.0 # Calculated separately if needed
123
+ }
124
+
125
+ def reprocess_document(self, document_id: str) -> bool:
126
+ """Trigger document reprocessing."""
127
+ try:
128
+ document = Document.objects.get(
129
+ id=document_id,
130
+ user=self.user
131
+ )
132
+
133
+ # Reset processing status
134
+ document.processing_status = ProcessingStatus.PENDING
135
+ document.processing_error = ""
136
+ document.save()
137
+
138
+ # Trigger async reprocessing
139
+ from ..tasks import reprocess_document_chunks
140
+ reprocess_document_chunks.send(str(document.id))
141
+
142
+ return True
143
+ except Document.DoesNotExist:
144
+ return False
@@ -0,0 +1,43 @@
1
+ """
2
+ Embedding processing services.
3
+
4
+ This package provides high-performance embedding generation services
5
+ for documents and archives with batch processing and async support.
6
+ """
7
+
8
+ from .models import ChunkData, EmbeddingResult, BatchProcessingResult, ProcessingConfig, ChunkType
9
+ from .processors import DocumentChunkProcessor, ArchiveChunkProcessor, ExternalDataChunkProcessor
10
+ from .batch_processor import OptimizedEmbeddingProcessor
11
+ from .async_processor import AsyncOptimizedEmbeddingProcessor
12
+ from .batch_result import BatchResultBuilder
13
+ from .utils import (
14
+ process_document_chunks_optimized,
15
+ process_archive_chunks_optimized,
16
+ process_external_data_chunks_optimized,
17
+ process_chunks_context_aware,
18
+ )
19
+
20
+ __all__ = [
21
+ # Data models
22
+ "ChunkData",
23
+ "EmbeddingResult",
24
+ "BatchProcessingResult",
25
+ "ProcessingConfig",
26
+ "ChunkType",
27
+
28
+ # Processors
29
+ "DocumentChunkProcessor",
30
+ "ArchiveChunkProcessor",
31
+ "ExternalDataChunkProcessor",
32
+ "OptimizedEmbeddingProcessor",
33
+ "AsyncOptimizedEmbeddingProcessor",
34
+
35
+ # Utilities
36
+ "BatchResultBuilder",
37
+
38
+ # Convenience functions
39
+ "process_document_chunks_optimized",
40
+ "process_archive_chunks_optimized",
41
+ "process_external_data_chunks_optimized",
42
+ "process_chunks_context_aware",
43
+ ]
@@ -0,0 +1,244 @@
1
+ """
2
+ Async-compatible embedding processor for Django 5.2.
3
+
4
+ This module provides async/sync compatibility for embedding generation,
5
+ following Django 5.2 async best practices.
6
+ """
7
+
8
+ import asyncio
9
+ import time
10
+ import logging
11
+ from typing import List, Dict, Any, Optional, Union
12
+
13
+ from asgiref.sync import sync_to_async, async_to_sync
14
+ from django.db import transaction
15
+
16
+ from django_cfg.apps.knowbase.models import DocumentChunk, ArchiveItemChunk
17
+ from django_cfg.apps.knowbase.utils.chunk_settings import get_embedding_batch_size, get_embedding_model
18
+
19
+ from .models import ChunkData, BatchProcessingResult, EmbeddingResult
20
+ from .batch_processor import OptimizedEmbeddingProcessor
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def is_async_context() -> bool:
26
+ """Detect current execution context."""
27
+ try:
28
+ asyncio.get_running_loop()
29
+ return True
30
+ except RuntimeError:
31
+ return False
32
+
33
+
34
+ class AsyncOptimizedEmbeddingProcessor(OptimizedEmbeddingProcessor):
35
+ """
36
+ Async-compatible embedding processor that works in both sync and async contexts.
37
+
38
+ Based on Django 5.2 async patterns:
39
+ - Context-aware operations
40
+ - Proper async/sync method selection
41
+ - Compatible with both WSGI and ASGI
42
+ """
43
+
44
+ def __init__(self, batch_size: Optional[int] = None, embedding_model: Optional[str] = None):
45
+ """Initialize async-compatible processor."""
46
+ super().__init__(batch_size, embedding_model)
47
+ logger.info(f"🚀 AsyncOptimizedEmbeddingProcessor initialized: async_context={is_async_context()}")
48
+
49
+ async def aprocess_chunks_batch(self, chunks: List[ChunkData]) -> BatchProcessingResult:
50
+ """
51
+ Async version of batch processing.
52
+
53
+ Uses Django 5.2 async ORM methods (a-prefixed) for database operations.
54
+ """
55
+ start_time = time.time()
56
+ total_chunks = len(chunks)
57
+ successful_chunks = 0
58
+ failed_chunks = 0
59
+ total_tokens = 0
60
+ total_cost = 0.0
61
+ errors = []
62
+
63
+ logger.info(f"🔮 Starting async batch processing of {total_chunks} chunks")
64
+
65
+ # Process in batches
66
+ for i in range(0, total_chunks, self.batch_size):
67
+ batch = chunks[i:i + self.batch_size]
68
+ batch_num = (i // self.batch_size) + 1
69
+ total_batches = (total_chunks + self.batch_size - 1) // self.batch_size
70
+
71
+ logger.info(f"🔮 Processing async batch {batch_num}/{total_batches} ({len(batch)} chunks)")
72
+
73
+ try:
74
+ batch_result = await self._aprocess_single_batch(batch)
75
+
76
+ successful_chunks += batch_result['successful']
77
+ failed_chunks += batch_result['failed']
78
+ total_tokens += batch_result['tokens']
79
+ total_cost += batch_result['cost']
80
+ errors.extend(batch_result['errors'])
81
+
82
+ # Small async delay between batches
83
+ if i + self.batch_size < total_chunks:
84
+ await asyncio.sleep(0.5)
85
+
86
+ except Exception as e:
87
+ error_msg = f"Async batch {batch_num} failed: {str(e)}"
88
+ logger.error(f"❌ {error_msg}")
89
+ errors.append(error_msg)
90
+ failed_chunks += len(batch)
91
+
92
+ processing_time = time.time() - start_time
93
+
94
+ result = BatchProcessingResult(
95
+ total_chunks=total_chunks,
96
+ successful_chunks=successful_chunks,
97
+ failed_chunks=failed_chunks,
98
+ total_tokens=total_tokens,
99
+ total_cost=total_cost,
100
+ processing_time=processing_time,
101
+ errors=errors
102
+ )
103
+
104
+ logger.info(
105
+ f"🎉 Async batch processing completed: {successful_chunks}/{total_chunks} successful, "
106
+ f"{total_tokens} tokens, ${total_cost:.4f} cost, {processing_time:.2f}s"
107
+ )
108
+
109
+ return result
110
+
111
+ async def _aprocess_single_batch(self, batch: List[ChunkData]) -> Dict[str, Any]:
112
+ """Async version of single batch processing."""
113
+
114
+ # Prepare content for all chunks (sync operation)
115
+ prepared_contents = []
116
+ chunk_mapping = {}
117
+
118
+ for idx, chunk in enumerate(batch):
119
+ processor = self.processors.get(chunk.parent_type)
120
+ if not processor:
121
+ logger.warning(f"⚠️ Unknown chunk type: {chunk.parent_type}")
122
+ continue
123
+
124
+ try:
125
+ content = processor.prepare_content_for_embedding(chunk)
126
+ if content and content.strip():
127
+ prepared_contents.append(content)
128
+ chunk_mapping[len(prepared_contents) - 1] = chunk
129
+ else:
130
+ logger.warning(f"⚠️ Empty content for chunk {chunk.id}")
131
+ except Exception as e:
132
+ logger.error(f"❌ Failed to prepare content for chunk {chunk.id}: {e}")
133
+
134
+ if not prepared_contents:
135
+ return {
136
+ 'successful': 0,
137
+ 'failed': len(batch),
138
+ 'tokens': 0,
139
+ 'cost': 0.0,
140
+ 'errors': ['No valid content to process']
141
+ }
142
+
143
+ # Generate embeddings (sync operation - OpenAI client is sync)
144
+ try:
145
+ embedding_results = self._generate_batch_embeddings(prepared_contents)
146
+
147
+ # Save results using async database operations
148
+ successful = 0
149
+ failed = 0
150
+ total_tokens = 0
151
+ total_cost = 0.0
152
+ errors = []
153
+
154
+ for idx, embedding_result in enumerate(embedding_results):
155
+ if idx not in chunk_mapping:
156
+ continue
157
+
158
+ chunk = chunk_mapping[idx]
159
+
160
+ if embedding_result.success:
161
+ try:
162
+ await self._asave_embedding_result(chunk, embedding_result)
163
+ successful += 1
164
+ total_tokens += embedding_result.tokens
165
+ total_cost += embedding_result.cost
166
+ except Exception as e:
167
+ error_msg = f"Failed to save async embedding for chunk {chunk.id}: {e}"
168
+ logger.error(f"❌ {error_msg}")
169
+ errors.append(error_msg)
170
+ failed += 1
171
+ else:
172
+ errors.append(embedding_result.error or f"Failed to generate embedding for chunk {chunk.id}")
173
+ failed += 1
174
+
175
+ return {
176
+ 'successful': successful,
177
+ 'failed': failed,
178
+ 'tokens': total_tokens,
179
+ 'cost': total_cost,
180
+ 'errors': errors
181
+ }
182
+
183
+ except Exception as e:
184
+ error_msg = f"Async batch embedding generation failed: {e}"
185
+ logger.error(f"❌ {error_msg}")
186
+ return {
187
+ 'successful': 0,
188
+ 'failed': len(batch),
189
+ 'tokens': 0,
190
+ 'cost': 0.0,
191
+ 'errors': [error_msg]
192
+ }
193
+
194
+ async def _asave_embedding_result(self, chunk: ChunkData, result: EmbeddingResult) -> None:
195
+ """Save embedding result using async database operations."""
196
+
197
+ try:
198
+ if chunk.parent_type == "document":
199
+ # Use Django 5.2 async ORM methods
200
+ chunk_obj = await DocumentChunk.objects.aget(id=chunk.id)
201
+ chunk_obj.embedding = result.embedding
202
+ chunk_obj.token_count = result.tokens
203
+ chunk_obj.embedding_cost = result.cost
204
+ await chunk_obj.asave(update_fields=['embedding', 'token_count', 'embedding_cost'])
205
+
206
+ logger.debug(f"✅ Async document chunk {chunk.id} embedding saved: {result.tokens} tokens, ${result.cost:.4f}")
207
+
208
+ elif chunk.parent_type == "archive":
209
+ # Use async ORM with select_related
210
+ chunk_obj = await ArchiveItemChunk.objects.select_related('item').aget(id=chunk.id)
211
+ chunk_obj.embedding = result.embedding
212
+ chunk_obj.token_count = result.tokens
213
+ chunk_obj.embedding_cost = result.cost
214
+ await chunk_obj.asave(update_fields=['embedding', 'token_count', 'embedding_cost'])
215
+
216
+ # Update parent item statistics
217
+ item = chunk_obj.item
218
+ item.total_tokens += result.tokens
219
+ item.processing_cost += result.cost
220
+ await item.asave(update_fields=['total_tokens', 'processing_cost'])
221
+
222
+ logger.debug(f"✅ Async archive chunk {chunk.id} embedding saved: {result.tokens} tokens, ${result.cost:.4f}")
223
+ else:
224
+ raise ValueError(f"Unknown chunk type: {chunk.parent_type}")
225
+
226
+ except Exception as e:
227
+ logger.error(f"❌ Failed to save async embedding for chunk {chunk.id}: {e}")
228
+ raise
229
+
230
+ def process_chunks_batch_context_aware(self, chunks: List[ChunkData]) -> BatchProcessingResult:
231
+ """
232
+ Context-aware processing that works in both sync and async contexts.
233
+
234
+ Based on Django 5.2 async patterns.
235
+ """
236
+ if is_async_context():
237
+ # We're in async context - use async methods
238
+ logger.info("🔮 Detected async context - using async processing")
239
+ # Convert async method to sync for compatibility
240
+ return async_to_sync(self.aprocess_chunks_batch)(chunks)
241
+ else:
242
+ # We're in sync context - use sync methods
243
+ logger.info("🔮 Detected sync context - using sync processing")
244
+ return super().process_chunks_batch(chunks)
@@ -0,0 +1,250 @@
1
+ """
2
+ High-performance batch embedding processor.
3
+
4
+ This module provides the main batch processing engine for generating
5
+ embeddings with optimized API calls and database operations.
6
+ """
7
+
8
+ import time
9
+ import logging
10
+ from typing import List, Dict, Any, Optional
11
+
12
+ from django_cfg.apps.knowbase.utils.chunk_settings import get_embedding_batch_size, get_embedding_model
13
+ from django_cfg.apps.knowbase.config.settings import get_openai_api_key, get_cache_settings
14
+ from django_cfg.modules.django_llm.llm.client import LLMClient
15
+
16
+ from .models import ChunkData, EmbeddingResult, BatchProcessingResult, ProcessingConfig, ChunkType
17
+ from .processors import DocumentChunkProcessor, ArchiveChunkProcessor, ExternalDataChunkProcessor
18
+ from .batch_result import BatchResultBuilder
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class OptimizedEmbeddingProcessor:
24
+ """High-performance embedding processor with batch operations."""
25
+
26
+ def __init__(self, batch_size: Optional[int] = None, embedding_model: Optional[str] = None):
27
+ """
28
+ Initialize the processor.
29
+
30
+ Args:
31
+ batch_size: Number of chunks to process in one API call (uses Constance setting if None)
32
+ embedding_model: Embedding model to use (uses Constance setting if None)
33
+ """
34
+ # Use Constance settings if not provided
35
+ self.batch_size = min(batch_size or get_embedding_batch_size(), 100) # Conservative limit for stability
36
+ self.embedding_model = embedding_model or get_embedding_model()
37
+
38
+ # Initialize LLM client with OpenAI only for embeddings
39
+ # OpenRouter doesn't support embedding models, so we use OpenAI directly
40
+ # Use auto-configured LLMClient with explicit OpenAI preference for embeddings
41
+ # Get cache settings from configuration (directory is auto-created)
42
+ cache_settings = get_cache_settings()
43
+ self.llm_client = LLMClient(
44
+ preferred_provider="openai", # Force OpenAI for embeddings
45
+ cache_dir=cache_settings.cache_dir,
46
+ cache_ttl=cache_settings.cache_ttl,
47
+ max_cache_size=cache_settings.max_cache_size
48
+ )
49
+
50
+ # Processors for different chunk types
51
+ self.processors = {
52
+ ChunkType.DOCUMENT: DocumentChunkProcessor(),
53
+ ChunkType.ARCHIVE: ArchiveChunkProcessor(),
54
+ ChunkType.EXTERNAL_DATA: ExternalDataChunkProcessor()
55
+ }
56
+
57
+ logger.info(f"🚀 OptimizedEmbeddingProcessor initialized: batch_size={self.batch_size}, model={self.embedding_model}")
58
+
59
+ def process_chunks_batch(self, chunks: List[ChunkData]) -> BatchProcessingResult:
60
+ """
61
+ Process multiple chunks with optimized batch operations.
62
+
63
+ Args:
64
+ chunks: List of chunks to process
65
+
66
+ Returns:
67
+ BatchProcessingResult with processing statistics
68
+ """
69
+ start_time = time.time()
70
+ total_chunks = len(chunks)
71
+ result_builder = BatchResultBuilder(total_chunks)
72
+
73
+ logger.info(f"🔮 Starting batch processing of {total_chunks} chunks")
74
+
75
+ # Process in batches
76
+ for i in range(0, total_chunks, self.batch_size):
77
+ batch = chunks[i:i + self.batch_size]
78
+ batch_num = (i // self.batch_size) + 1
79
+ total_batches = (total_chunks + self.batch_size - 1) // self.batch_size
80
+
81
+ logger.info(f"🔮 Processing batch {batch_num}/{total_batches} ({len(batch)} chunks)")
82
+
83
+ try:
84
+ batch_results = self._process_single_batch(batch)
85
+ result_builder.add_batch_results(batch_results)
86
+
87
+ # Small delay between batches to respect rate limits
88
+ if i + self.batch_size < total_chunks:
89
+ time.sleep(0.5)
90
+
91
+ except Exception as e:
92
+ error_msg = f"Batch {batch_num} failed: {str(e)}"
93
+ logger.error(f"❌ {error_msg}")
94
+ result_builder.add_batch_error(error_msg, len(batch))
95
+
96
+ processing_time = time.time() - start_time
97
+ result = result_builder.build(processing_time)
98
+
99
+ # Log using Pydantic model's summary
100
+ summary = result.model_dump_summary()
101
+ logger.info(f"🎉 Batch processing completed: {summary}")
102
+ logger.info(f"📊 Performance: {summary['chunks_per_second']} chunks/sec, {summary['avg_cost_per_chunk']} per chunk")
103
+
104
+ return result
105
+
106
+ def _process_single_batch(self, batch: List[ChunkData]) -> List[EmbeddingResult]:
107
+ """Process a single batch of chunks and return list of EmbeddingResult."""
108
+
109
+ # Prepare content for all chunks
110
+ prepared_contents = []
111
+ chunk_mapping = {} # Map index to chunk
112
+
113
+ for idx, chunk in enumerate(batch):
114
+ processor = self.processors.get(chunk.parent_type)
115
+ if not processor:
116
+ logger.warning(f"⚠️ Unknown chunk type: {chunk.parent_type}")
117
+ continue
118
+
119
+ try:
120
+ content = processor.prepare_content_for_embedding(chunk)
121
+ if content and content.strip():
122
+ prepared_contents.append(content)
123
+ chunk_mapping[len(prepared_contents) - 1] = chunk
124
+ else:
125
+ logger.warning(f"⚠️ Empty content for chunk {chunk.id}")
126
+ except Exception as e:
127
+ logger.error(f"❌ Failed to prepare content for chunk {chunk.id}: {e}")
128
+
129
+ if not prepared_contents:
130
+ # Return failed results for all chunks
131
+ return [
132
+ EmbeddingResult(
133
+ chunk_id=chunk.id,
134
+ success=False,
135
+ error="No valid content to process"
136
+ )
137
+ for chunk in batch
138
+ ]
139
+
140
+ # Generate embeddings in batch
141
+ try:
142
+ embedding_results = self._generate_batch_embeddings(prepared_contents)
143
+
144
+ # Process and save results
145
+ final_results = []
146
+
147
+ for idx, embedding_result in enumerate(embedding_results):
148
+ if idx not in chunk_mapping:
149
+ continue
150
+
151
+ chunk = chunk_mapping[idx]
152
+ processor = self.processors[chunk.parent_type]
153
+
154
+ # Set the chunk_id in the result
155
+ embedding_result.chunk_id = chunk.id
156
+
157
+ if embedding_result.success:
158
+ try:
159
+ logger.debug(f"🔄 Attempting to save embedding for chunk {chunk.id} (type: {chunk.parent_type})")
160
+ processor.save_embedding_result(chunk.id, embedding_result)
161
+ logger.info(f"✅ Successfully saved embedding for chunk {chunk.id}")
162
+ final_results.append(embedding_result)
163
+ except Exception as e:
164
+ error_msg = f"Failed to save embedding for chunk {chunk.id}: {e}"
165
+ logger.error(f"❌ {error_msg}")
166
+ failed_result = EmbeddingResult(
167
+ chunk_id=chunk.id,
168
+ success=False,
169
+ error=error_msg
170
+ )
171
+ final_results.append(failed_result)
172
+ else:
173
+ final_results.append(embedding_result)
174
+
175
+ return final_results
176
+
177
+ except Exception as e:
178
+ error_msg = f"Batch embedding generation failed: {e}"
179
+ logger.error(f"❌ {error_msg}")
180
+ # Return failed results for all chunks
181
+ return [
182
+ EmbeddingResult(
183
+ chunk_id=chunk.id,
184
+ success=False,
185
+ error=error_msg
186
+ )
187
+ for chunk in batch
188
+ ]
189
+
190
+ def _generate_batch_embeddings(self, contents: List[str]) -> List[EmbeddingResult]:
191
+ """Generate embeddings for multiple contents using LLMClient."""
192
+
193
+ results = []
194
+
195
+ try:
196
+ # Use LLMClient's generate_embedding method for each content
197
+ # This handles both OpenAI and OpenRouter properly
198
+ for idx, content in enumerate(contents):
199
+ try:
200
+ # Use LLMClient's method which handles provider differences
201
+ embedding_response = self.llm_client.generate_embedding(
202
+ text=content,
203
+ model=self.embedding_model
204
+ )
205
+
206
+ results.append(EmbeddingResult(
207
+ chunk_id="", # Will be set by caller
208
+ embedding=embedding_response.embedding,
209
+ tokens=embedding_response.tokens,
210
+ cost=embedding_response.cost,
211
+ success=True
212
+ ))
213
+
214
+ except Exception as e:
215
+ logger.error(f"❌ Failed to generate embedding for content {idx}: {e}")
216
+ results.append(EmbeddingResult(
217
+ chunk_id="",
218
+ embedding=[],
219
+ tokens=0,
220
+ cost=0.0,
221
+ success=False,
222
+ error=str(e)
223
+ ))
224
+
225
+ successful_count = len([r for r in results if r.success])
226
+ logger.info(f"🎯 Generated {successful_count}/{len(results)} embeddings successfully")
227
+
228
+ # Log details of each result
229
+ for i, result in enumerate(results):
230
+ if result.success:
231
+ logger.debug(f" ✅ Result {i}: {result.tokens} tokens, ${result.cost:.4f}, embedding_len={len(result.embedding)}")
232
+ else:
233
+ logger.debug(f" ❌ Result {i}: {result.error}")
234
+
235
+ return results
236
+
237
+ except Exception as e:
238
+ logger.error(f"❌ Batch embedding generation failed: {e}")
239
+ # Return failed results for all contents
240
+ return [
241
+ EmbeddingResult(
242
+ chunk_id="",
243
+ embedding=[],
244
+ tokens=0,
245
+ cost=0.0,
246
+ success=False,
247
+ error=str(e)
248
+ )
249
+ for _ in contents
250
+ ]