django-cfg 1.1.82__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (244) hide show
  1. django_cfg/__init__.py +20 -448
  2. django_cfg/apps/accounts/README.md +3 -3
  3. django_cfg/apps/accounts/admin/__init__.py +0 -2
  4. django_cfg/apps/accounts/admin/activity.py +2 -9
  5. django_cfg/apps/accounts/admin/filters.py +0 -42
  6. django_cfg/apps/accounts/admin/inlines.py +8 -8
  7. django_cfg/apps/accounts/admin/otp.py +5 -5
  8. django_cfg/apps/accounts/admin/registration_source.py +1 -8
  9. django_cfg/apps/accounts/admin/user.py +12 -20
  10. django_cfg/apps/accounts/managers/user_manager.py +2 -129
  11. django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
  12. django_cfg/apps/accounts/models.py +3 -123
  13. django_cfg/apps/accounts/serializers/otp.py +40 -44
  14. django_cfg/apps/accounts/serializers/profile.py +0 -2
  15. django_cfg/apps/accounts/services/otp_service.py +98 -186
  16. django_cfg/apps/accounts/signals.py +25 -15
  17. django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
  18. django_cfg/apps/accounts/views/otp.py +35 -36
  19. django_cfg/apps/agents/README.md +129 -0
  20. django_cfg/apps/agents/__init__.py +68 -0
  21. django_cfg/apps/agents/admin/__init__.py +17 -0
  22. django_cfg/apps/agents/admin/execution_admin.py +460 -0
  23. django_cfg/apps/agents/admin/registry_admin.py +360 -0
  24. django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
  25. django_cfg/apps/agents/apps.py +29 -0
  26. django_cfg/apps/agents/core/__init__.py +20 -0
  27. django_cfg/apps/agents/core/agent.py +281 -0
  28. django_cfg/apps/agents/core/dependencies.py +154 -0
  29. django_cfg/apps/agents/core/exceptions.py +66 -0
  30. django_cfg/apps/agents/core/models.py +106 -0
  31. django_cfg/apps/agents/core/orchestrator.py +391 -0
  32. django_cfg/apps/agents/examples/__init__.py +3 -0
  33. django_cfg/apps/agents/examples/simple_example.py +161 -0
  34. django_cfg/apps/agents/integration/__init__.py +14 -0
  35. django_cfg/apps/agents/integration/middleware.py +80 -0
  36. django_cfg/apps/agents/integration/registry.py +345 -0
  37. django_cfg/apps/agents/integration/signals.py +50 -0
  38. django_cfg/apps/agents/management/__init__.py +3 -0
  39. django_cfg/apps/agents/management/commands/__init__.py +3 -0
  40. django_cfg/apps/agents/management/commands/create_agent.py +365 -0
  41. django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
  42. django_cfg/apps/agents/managers/__init__.py +23 -0
  43. django_cfg/apps/agents/managers/execution.py +236 -0
  44. django_cfg/apps/agents/managers/registry.py +254 -0
  45. django_cfg/apps/agents/managers/toolsets.py +496 -0
  46. django_cfg/apps/agents/migrations/0001_initial.py +286 -0
  47. django_cfg/apps/agents/migrations/__init__.py +5 -0
  48. django_cfg/apps/agents/models/__init__.py +15 -0
  49. django_cfg/apps/agents/models/execution.py +215 -0
  50. django_cfg/apps/agents/models/registry.py +220 -0
  51. django_cfg/apps/agents/models/toolsets.py +305 -0
  52. django_cfg/apps/agents/patterns/__init__.py +24 -0
  53. django_cfg/apps/agents/patterns/content_agents.py +234 -0
  54. django_cfg/apps/agents/toolsets/__init__.py +15 -0
  55. django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
  56. django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
  57. django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
  58. django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
  59. django_cfg/apps/agents/urls.py +46 -0
  60. django_cfg/apps/knowbase/README.md +150 -0
  61. django_cfg/apps/knowbase/__init__.py +27 -0
  62. django_cfg/apps/knowbase/admin/__init__.py +23 -0
  63. django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
  64. django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
  65. django_cfg/apps/knowbase/admin/document_admin.py +650 -0
  66. django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
  67. django_cfg/apps/knowbase/apps.py +81 -0
  68. django_cfg/apps/knowbase/config/README.md +176 -0
  69. django_cfg/apps/knowbase/config/__init__.py +51 -0
  70. django_cfg/apps/knowbase/config/constance_fields.py +186 -0
  71. django_cfg/apps/knowbase/config/constance_settings.py +200 -0
  72. django_cfg/apps/knowbase/config/settings.py +450 -0
  73. django_cfg/apps/knowbase/examples/__init__.py +3 -0
  74. django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
  75. django_cfg/apps/knowbase/management/__init__.py +0 -0
  76. django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
  77. django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
  78. django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
  79. django_cfg/apps/knowbase/managers/__init__.py +22 -0
  80. django_cfg/apps/knowbase/managers/archive.py +426 -0
  81. django_cfg/apps/knowbase/managers/base.py +32 -0
  82. django_cfg/apps/knowbase/managers/chat.py +141 -0
  83. django_cfg/apps/knowbase/managers/document.py +203 -0
  84. django_cfg/apps/knowbase/managers/external_data.py +471 -0
  85. django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
  86. django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
  87. django_cfg/apps/knowbase/migrations/__init__.py +5 -0
  88. django_cfg/apps/knowbase/mixins/__init__.py +15 -0
  89. django_cfg/apps/knowbase/mixins/config.py +108 -0
  90. django_cfg/apps/knowbase/mixins/creator.py +81 -0
  91. django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
  92. django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
  93. django_cfg/apps/knowbase/mixins/service.py +362 -0
  94. django_cfg/apps/knowbase/models/__init__.py +41 -0
  95. django_cfg/apps/knowbase/models/archive.py +599 -0
  96. django_cfg/apps/knowbase/models/base.py +58 -0
  97. django_cfg/apps/knowbase/models/chat.py +157 -0
  98. django_cfg/apps/knowbase/models/document.py +267 -0
  99. django_cfg/apps/knowbase/models/external_data.py +376 -0
  100. django_cfg/apps/knowbase/serializers/__init__.py +68 -0
  101. django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
  102. django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
  103. django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
  104. django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
  105. django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
  106. django_cfg/apps/knowbase/services/__init__.py +40 -0
  107. django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
  108. django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
  109. django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
  110. django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
  111. django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
  112. django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
  113. django_cfg/apps/knowbase/services/base.py +53 -0
  114. django_cfg/apps/knowbase/services/chat_service.py +239 -0
  115. django_cfg/apps/knowbase/services/document_service.py +144 -0
  116. django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
  117. django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
  118. django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
  119. django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
  120. django_cfg/apps/knowbase/services/embedding/models.py +229 -0
  121. django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
  122. django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
  123. django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
  124. django_cfg/apps/knowbase/services/search_service.py +293 -0
  125. django_cfg/apps/knowbase/signals/__init__.py +21 -0
  126. django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
  127. django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
  128. django_cfg/apps/knowbase/signals/document_signals.py +143 -0
  129. django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
  130. django_cfg/apps/knowbase/tasks/__init__.py +39 -0
  131. django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
  132. django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
  133. django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
  134. django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
  135. django_cfg/apps/knowbase/urls.py +43 -0
  136. django_cfg/apps/knowbase/utils/__init__.py +12 -0
  137. django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
  138. django_cfg/apps/knowbase/utils/text_processing.py +375 -0
  139. django_cfg/apps/knowbase/utils/validation.py +99 -0
  140. django_cfg/apps/knowbase/views/__init__.py +28 -0
  141. django_cfg/apps/knowbase/views/archive_views.py +469 -0
  142. django_cfg/apps/knowbase/views/base.py +49 -0
  143. django_cfg/apps/knowbase/views/chat_views.py +181 -0
  144. django_cfg/apps/knowbase/views/document_views.py +183 -0
  145. django_cfg/apps/knowbase/views/public_views.py +129 -0
  146. django_cfg/apps/leads/admin.py +70 -0
  147. django_cfg/apps/newsletter/admin.py +234 -0
  148. django_cfg/apps/newsletter/admin_filters.py +124 -0
  149. django_cfg/apps/support/admin.py +196 -0
  150. django_cfg/apps/support/admin_filters.py +71 -0
  151. django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
  152. django_cfg/apps/urls.py +5 -4
  153. django_cfg/cli/README.md +1 -1
  154. django_cfg/cli/commands/create_project.py +2 -2
  155. django_cfg/cli/commands/info.py +1 -1
  156. django_cfg/config.py +44 -0
  157. django_cfg/core/config.py +29 -82
  158. django_cfg/core/environment.py +1 -1
  159. django_cfg/core/generation.py +19 -107
  160. django_cfg/{integration.py → core/integration.py} +18 -16
  161. django_cfg/core/validation.py +1 -1
  162. django_cfg/management/__init__.py +1 -1
  163. django_cfg/management/commands/__init__.py +1 -1
  164. django_cfg/management/commands/auto_generate.py +482 -0
  165. django_cfg/management/commands/migrator.py +19 -101
  166. django_cfg/management/commands/test_email.py +1 -1
  167. django_cfg/middleware/README.md +0 -158
  168. django_cfg/middleware/__init__.py +0 -2
  169. django_cfg/middleware/user_activity.py +3 -3
  170. django_cfg/models/api.py +145 -0
  171. django_cfg/models/base.py +287 -0
  172. django_cfg/models/cache.py +4 -4
  173. django_cfg/models/constance.py +25 -88
  174. django_cfg/models/database.py +9 -9
  175. django_cfg/models/drf.py +3 -36
  176. django_cfg/models/email.py +163 -0
  177. django_cfg/models/environment.py +276 -0
  178. django_cfg/models/limits.py +1 -1
  179. django_cfg/models/logging.py +366 -0
  180. django_cfg/models/revolution.py +41 -2
  181. django_cfg/models/security.py +125 -0
  182. django_cfg/models/services.py +1 -1
  183. django_cfg/modules/__init__.py +2 -56
  184. django_cfg/modules/base.py +78 -52
  185. django_cfg/modules/django_currency/service.py +2 -2
  186. django_cfg/modules/django_email.py +2 -2
  187. django_cfg/modules/django_health.py +267 -0
  188. django_cfg/modules/django_llm/llm/client.py +91 -19
  189. django_cfg/modules/django_llm/translator/translator.py +2 -2
  190. django_cfg/modules/django_logger.py +2 -2
  191. django_cfg/modules/django_ngrok.py +2 -2
  192. django_cfg/modules/django_tasks.py +68 -3
  193. django_cfg/modules/django_telegram.py +3 -3
  194. django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
  195. django_cfg/modules/django_twilio/service.py +2 -2
  196. django_cfg/modules/django_twilio/simple_service.py +2 -2
  197. django_cfg/modules/django_twilio/twilio_service.py +2 -2
  198. django_cfg/modules/django_unfold/__init__.py +69 -0
  199. django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
  200. django_cfg/modules/django_unfold/dashboard.py +278 -0
  201. django_cfg/modules/django_unfold/icons/README.md +145 -0
  202. django_cfg/modules/django_unfold/icons/__init__.py +12 -0
  203. django_cfg/modules/django_unfold/icons/constants.py +2851 -0
  204. django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
  205. django_cfg/modules/django_unfold/models/__init__.py +42 -0
  206. django_cfg/modules/django_unfold/models/config.py +601 -0
  207. django_cfg/modules/django_unfold/models/dashboard.py +206 -0
  208. django_cfg/modules/django_unfold/models/dropdown.py +40 -0
  209. django_cfg/modules/django_unfold/models/navigation.py +73 -0
  210. django_cfg/modules/django_unfold/models/tabs.py +25 -0
  211. django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
  212. django_cfg/modules/django_unfold/utils.py +140 -0
  213. django_cfg/registry/__init__.py +23 -0
  214. django_cfg/registry/core.py +61 -0
  215. django_cfg/registry/exceptions.py +11 -0
  216. django_cfg/registry/modules.py +12 -0
  217. django_cfg/registry/services.py +26 -0
  218. django_cfg/registry/third_party.py +52 -0
  219. django_cfg/routing/__init__.py +19 -0
  220. django_cfg/routing/callbacks.py +198 -0
  221. django_cfg/routing/routers.py +48 -0
  222. django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
  223. django_cfg/templatetags/__init__.py +0 -0
  224. django_cfg/templatetags/django_cfg.py +33 -0
  225. django_cfg/urls.py +33 -0
  226. django_cfg/utils/path_resolution.py +1 -1
  227. django_cfg/utils/smart_defaults.py +7 -61
  228. django_cfg/utils/toolkit.py +663 -0
  229. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/METADATA +83 -86
  230. django_cfg-1.2.1.dist-info/RECORD +441 -0
  231. django_cfg/archive/django_sample.zip +0 -0
  232. django_cfg/models/unfold.py +0 -271
  233. django_cfg/modules/unfold/__init__.py +0 -29
  234. django_cfg/modules/unfold/dashboard.py +0 -318
  235. django_cfg/pyproject.toml +0 -370
  236. django_cfg/routers.py +0 -83
  237. django_cfg-1.1.82.dist-info/RECORD +0 -278
  238. /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
  239. /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
  240. /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
  241. /django_cfg/{version_check.py → utils/version_check.py} +0 -0
  242. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/WHEEL +0 -0
  243. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/entry_points.txt +0 -0
  244. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,362 @@
1
+ """
2
+ Archive vectorization service.
3
+
4
+ Handles embedding generation for archive chunks with batch processing.
5
+ """
6
+
7
+ from typing import List, Dict, Any, Optional
8
+ from django.contrib.auth import get_user_model
9
+ from django.db import transaction, models
10
+ import logging
11
+ from ...models.archive import ArchiveItemChunk
12
+ from django_cfg.modules.django_llm.llm.models import EmbeddingResponse
13
+ from ..base import BaseService
14
+ from ..embedding import process_archive_chunks_optimized
15
+ from .exceptions import VectorizationError
16
+
17
+
18
+ User = get_user_model()
19
+ logger = logging.getLogger(__name__)
20
+ logger.setLevel(logging.DEBUG)
21
+
22
+ class VectorizationResult:
23
+ """Result of vectorization operation."""
24
+
25
+ def __init__(self):
26
+ self.vectorized_count: int = 0
27
+ self.failed_count: int = 0
28
+ self.total_tokens: int = 0
29
+ self.total_cost: float = 0.0
30
+ self.errors: List[str] = []
31
+
32
+
33
+ class ArchiveVectorizationService(BaseService):
34
+ """Service for vectorizing archive chunks."""
35
+
36
+ def __init__(self, user: User):
37
+ super().__init__(user)
38
+ self.batch_size = 10 # Process chunks in batches
39
+
40
+ def vectorize_chunks_batch(self, chunks: List[ArchiveItemChunk]) -> Dict[str, Any]:
41
+ """Vectorize chunks using optimized batch processing."""
42
+
43
+ if not chunks:
44
+ logger.warning("🔮 No chunks provided for vectorization")
45
+ return {
46
+ 'vectorized_count': 0,
47
+ 'failed_count': 0,
48
+ 'total_tokens': 0,
49
+ 'total_cost': 0.0,
50
+ 'success_rate': 0.0,
51
+ 'errors': []
52
+ }
53
+
54
+ logger.info(f"🔮 Starting optimized vectorization of {len(chunks)} chunks")
55
+
56
+ try:
57
+ # Use optimized batch processor
58
+ result = process_archive_chunks_optimized(chunks)
59
+
60
+ logger.info(
61
+ f"🔮 Optimized vectorization completed: {result.successful_chunks}/{result.total_chunks} chunks, "
62
+ f"{result.failed_chunks} failed, {result.total_tokens} tokens, ${result.total_cost:.4f} cost, "
63
+ f"{result.processing_time:.2f}s"
64
+ )
65
+
66
+ return {
67
+ 'vectorized_count': result.successful_chunks,
68
+ 'failed_count': result.failed_chunks,
69
+ 'total_tokens': result.total_tokens,
70
+ 'total_cost': result.total_cost,
71
+ 'success_rate': result.successful_chunks / result.total_chunks if result.total_chunks > 0 else 0.0,
72
+ 'errors': result.errors
73
+ }
74
+
75
+ except Exception as e:
76
+ logger.error(f"❌ Optimized vectorization failed: {e}")
77
+ raise VectorizationError(
78
+ message=f"Optimized vectorization failed: {str(e)}",
79
+ code="OPTIMIZED_VECTORIZATION_FAILED",
80
+ details={
81
+ "total_chunks": len(chunks),
82
+ "error": str(e)
83
+ }
84
+ ) from e
85
+
86
+ def _vectorize_chunk_batch(self, chunks: List[ArchiveItemChunk]) -> VectorizationResult:
87
+ """Vectorize a single batch of chunks."""
88
+
89
+ result = VectorizationResult()
90
+
91
+ for chunk in chunks:
92
+ try:
93
+ # Skip if already vectorized
94
+ if chunk.embedding is not None and len(chunk.embedding) > 0:
95
+ logger.debug(f"🔮 Chunk {chunk.id} already vectorized, skipping")
96
+ continue
97
+
98
+ logger.debug(f"🔮 Generating embedding for chunk {chunk.id} ({chunk.item.item_name})")
99
+
100
+ # Generate embedding
101
+ embedding_result = self._generate_chunk_embedding(chunk)
102
+
103
+ if embedding_result:
104
+ # Update chunk with embedding
105
+ with transaction.atomic():
106
+ chunk.embedding = embedding_result.embedding
107
+ chunk.token_count = embedding_result.tokens
108
+ chunk.embedding_cost = embedding_result.cost
109
+ chunk.save()
110
+
111
+ # Update item statistics
112
+ item = chunk.item
113
+ item.total_tokens += embedding_result.tokens
114
+ item.processing_cost += embedding_result.cost
115
+ item.save()
116
+
117
+ result.vectorized_count += 1
118
+ result.total_tokens += embedding_result.tokens
119
+ logger.debug(f"✅ Chunk {chunk.id} vectorized successfully: {embedding_result.tokens} tokens, ${embedding_result.cost:.4f}")
120
+ result.total_cost += embedding_result.cost
121
+ else:
122
+ result.failed_count += 1
123
+ error_msg = f"Failed to generate embedding for chunk {chunk.id}"
124
+ result.errors.append(error_msg)
125
+ logger.error(f"❌ {error_msg}")
126
+
127
+ except Exception as e:
128
+ result.failed_count += 1
129
+ error_msg = f"Error processing chunk {chunk.id}: {str(e)}"
130
+ result.errors.append(error_msg)
131
+ logger.error(f"❌ {error_msg}")
132
+ continue
133
+
134
+ return result
135
+
136
+ def _generate_chunk_embedding(self, chunk: ArchiveItemChunk) -> Optional[EmbeddingResponse]:
137
+ """Generate embedding for a single chunk."""
138
+
139
+ if not chunk.content or not chunk.content.strip():
140
+ return None
141
+
142
+ try:
143
+ # Prepare content for embedding
144
+ content_for_embedding = self._prepare_content_for_embedding(chunk)
145
+
146
+ logger.debug(f"🔮 Prepared content for embedding: {len(content_for_embedding)} chars")
147
+
148
+ # Generate embedding using LLM client with specified model
149
+ from django_cfg.apps.knowbase.utils.chunk_settings import get_embedding_model
150
+ embedding_model = get_embedding_model()
151
+ embedding_result = self.llm_client.generate_embedding(
152
+ text=content_for_embedding,
153
+ model=embedding_model
154
+ )
155
+
156
+ if embedding_result:
157
+ logger.debug(f"🔮 Embedding generated successfully for chunk {chunk.id}")
158
+ logger.debug(f"🔮 Embedding result structure: {list(embedding_result.keys()) if isinstance(embedding_result, dict) else type(embedding_result)}")
159
+ else:
160
+ logger.warning(f"🔮 Embedding generation returned None for chunk {chunk.id}")
161
+
162
+ return embedding_result
163
+
164
+ except Exception as e:
165
+ # Log error but don't raise - we want to continue with other chunks
166
+ logger.error(f"🔮 Error generating embedding for chunk {chunk.id}: {str(e)}", exc_info=True)
167
+ return None
168
+
169
+ def _prepare_content_for_embedding(self, chunk: ArchiveItemChunk) -> str:
170
+ """Prepare chunk content for embedding generation."""
171
+
172
+ content = chunk.content
173
+ context = chunk.context_metadata
174
+
175
+ # Add context information to improve embedding quality
176
+ context_prefix = self._build_context_prefix(context)
177
+
178
+ # Combine context and content
179
+ if context_prefix:
180
+ enhanced_content = f"{context_prefix}\n\n{content}"
181
+ else:
182
+ enhanced_content = content
183
+
184
+ # Ensure content is not too long for embedding model
185
+ max_length = 8000 # Conservative limit for most embedding models
186
+ if len(enhanced_content) > max_length:
187
+ # Truncate but keep context prefix
188
+ if context_prefix:
189
+ available_length = max_length - len(context_prefix) - 4 # Account for separators
190
+ truncated_content = content[:available_length] + "..."
191
+ enhanced_content = f"{context_prefix}\n\n{truncated_content}"
192
+ else:
193
+ enhanced_content = content[:max_length] + "..."
194
+
195
+ return enhanced_content
196
+
197
+ def _build_context_prefix(self, context: Dict[str, Any]) -> str:
198
+ """Build context prefix to enhance embedding quality."""
199
+
200
+ prefix_parts = []
201
+
202
+ # Archive context
203
+ archive_info = context.get('archive_info', {})
204
+ if archive_info.get('title'):
205
+ prefix_parts.append(f"Archive: {archive_info['title']}")
206
+
207
+ # Item context
208
+ item_info = context.get('item_info', {})
209
+ if item_info.get('relative_path'):
210
+ prefix_parts.append(f"File: {item_info['relative_path']}")
211
+
212
+ if item_info.get('content_type'):
213
+ prefix_parts.append(f"Type: {item_info['content_type']}")
214
+
215
+ if item_info.get('language'):
216
+ prefix_parts.append(f"Language: {item_info['language']}")
217
+
218
+ # Structure context
219
+ structure_info = context.get('structure_info', {})
220
+ if structure_info.get('element_name'):
221
+ prefix_parts.append(f"Element: {structure_info['element_name']}")
222
+
223
+ if structure_info.get('section_title'):
224
+ prefix_parts.append(f"Section: {structure_info['section_title']}")
225
+
226
+ # Semantic context
227
+ semantic_info = context.get('semantic_info', {})
228
+ if semantic_info.get('content_purpose'):
229
+ prefix_parts.append(f"Purpose: {semantic_info['content_purpose']}")
230
+
231
+ return " | ".join(prefix_parts) if prefix_parts else ""
232
+
233
+ def vectorize_single_chunk(self, chunk_id: str) -> Dict[str, Any]:
234
+ """Vectorize a single chunk by ID."""
235
+
236
+ try:
237
+ chunk = ArchiveItemChunk.objects.get(id=chunk_id, user=self.user)
238
+ except ArchiveItemChunk.DoesNotExist:
239
+ raise VectorizationError(
240
+ message=f"Chunk not found: {chunk_id}",
241
+ code="CHUNK_NOT_FOUND",
242
+ details={"chunk_id": chunk_id}
243
+ )
244
+
245
+ # Check if already vectorized
246
+ if chunk.embedding is not None and len(chunk.embedding) > 0:
247
+ return {
248
+ 'status': 'already_vectorized',
249
+ 'chunk_id': chunk_id,
250
+ 'token_count': chunk.token_count,
251
+ 'cost': chunk.embedding_cost
252
+ }
253
+
254
+ # Generate embedding
255
+ embedding_result = self._generate_chunk_embedding(chunk)
256
+
257
+ if not embedding_result:
258
+ raise VectorizationError(
259
+ message=f"Failed to generate embedding for chunk {chunk_id}",
260
+ code="EMBEDDING_GENERATION_FAILED",
261
+ details={"chunk_id": chunk_id}
262
+ )
263
+
264
+ # Update chunk
265
+ with transaction.atomic():
266
+ chunk.embedding = embedding_result.embedding
267
+ chunk.token_count = embedding_result.tokens
268
+ chunk.embedding_cost = embedding_result.cost
269
+ chunk.save()
270
+
271
+ # Update item statistics
272
+ item = chunk.item
273
+ item.total_tokens += embedding_result.tokens
274
+ item.processing_cost += embedding_result.cost
275
+ item.save()
276
+
277
+ return {
278
+ 'status': 'vectorized',
279
+ 'chunk_id': chunk_id,
280
+ 'token_count': embedding_result.tokens,
281
+ 'cost': embedding_result.cost
282
+ }
283
+
284
+ def get_vectorization_statistics(self, archive_id: Optional[str] = None) -> Dict[str, Any]:
285
+ """Get vectorization statistics for user's chunks."""
286
+
287
+ queryset = ArchiveItemChunk.objects.filter(user=self.user)
288
+
289
+ if archive_id:
290
+ queryset = queryset.filter(archive_id=archive_id)
291
+
292
+ total_chunks = queryset.count()
293
+ vectorized_chunks = queryset.filter(embedding__isnull=False).count()
294
+ pending_chunks = total_chunks - vectorized_chunks
295
+
296
+ # Aggregate statistics
297
+ stats = queryset.aggregate(
298
+ total_tokens=models.Sum('token_count'),
299
+ total_cost=models.Sum('embedding_cost'),
300
+ avg_tokens_per_chunk=models.Avg('token_count'),
301
+ avg_cost_per_chunk=models.Avg('embedding_cost')
302
+ )
303
+
304
+ return {
305
+ 'total_chunks': total_chunks,
306
+ 'vectorized_chunks': vectorized_chunks,
307
+ 'pending_chunks': pending_chunks,
308
+ 'vectorization_rate': vectorized_chunks / total_chunks if total_chunks > 0 else 0.0,
309
+ 'total_tokens': stats['total_tokens'] or 0,
310
+ 'total_cost': stats['total_cost'] or 0.0,
311
+ 'avg_tokens_per_chunk': stats['avg_tokens_per_chunk'] or 0.0,
312
+ 'avg_cost_per_chunk': stats['avg_cost_per_chunk'] or 0.0
313
+ }
314
+
315
+ def revectorize_chunks(
316
+ self,
317
+ chunk_ids: List[str],
318
+ force: bool = False
319
+ ) -> Dict[str, Any]:
320
+ """Re-vectorize specific chunks."""
321
+
322
+ chunks = ArchiveItemChunk.objects.filter(
323
+ id__in=chunk_ids,
324
+ user=self.user
325
+ )
326
+
327
+ if not force:
328
+ # Only re-vectorize chunks that don't have embeddings
329
+ chunks = chunks.filter(embedding__isnull=True)
330
+
331
+ return self.vectorize_chunks_batch(list(chunks))
332
+
333
+ def cleanup_failed_vectorizations(self) -> Dict[str, Any]:
334
+ """Clean up chunks that failed vectorization."""
335
+
336
+ # Find chunks without embeddings that are older than 1 hour
337
+ from django.utils import timezone
338
+ from datetime import timedelta
339
+
340
+ cutoff_time = timezone.now() - timedelta(hours=1)
341
+
342
+ failed_chunks = ArchiveItemChunk.objects.filter(
343
+ user=self.user,
344
+ embedding__isnull=True,
345
+ created_at__lt=cutoff_time
346
+ )
347
+
348
+ failed_count = failed_chunks.count()
349
+
350
+ # Attempt to re-vectorize
351
+ if failed_count > 0:
352
+ result = self.vectorize_chunks_batch(list(failed_chunks))
353
+
354
+ return {
355
+ 'found_failed_chunks': failed_count,
356
+ 'retry_result': result
357
+ }
358
+
359
+ return {
360
+ 'found_failed_chunks': 0,
361
+ 'retry_result': None
362
+ }
@@ -0,0 +1,53 @@
1
+ """
2
+ Base service classes and protocols.
3
+ """
4
+
5
+ from typing import Protocol, Optional, List, Dict, Any
6
+ from django.contrib.auth import get_user_model
7
+ from django_cfg.modules.django_llm.llm.client import LLMClient
8
+ from django.conf import settings
9
+ from abc import ABC
10
+ from ..config.settings import get_openai_api_key, get_openrouter_api_key, get_cache_settings
11
+
12
+ User = get_user_model()
13
+
14
+
15
+ class LLMServiceProtocol(Protocol):
16
+ """Protocol for LLM service dependency injection."""
17
+
18
+ def generate_embedding(self, text: str) -> List[float]: ...
19
+ def chat_completion(self, messages: List[Dict[str, str]]) -> Dict[str, Any]: ...
20
+ def count_tokens(self, text: str, model: str = None) -> int: ...
21
+ def estimate_cost(self, input_tokens: int, output_tokens: int, model: str = None) -> float: ...
22
+
23
+
24
+ class CacheServiceProtocol(Protocol):
25
+ """Protocol for cache service."""
26
+
27
+ def get(self, key: str) -> Optional[str]: ...
28
+ def set(self, key: str, value: str, ttl: int = 3600) -> None: ...
29
+ def delete(self, key: str) -> None: ...
30
+
31
+
32
+ class BaseService(ABC):
33
+ """Base service with common functionality."""
34
+
35
+ def __init__(self, user: User):
36
+ self.user = user
37
+ # Initialize LLM client with configuration
38
+ cache_settings = get_cache_settings()
39
+ self.llm_client = LLMClient(
40
+ cache_dir=cache_settings.cache_dir,
41
+ cache_ttl=cache_settings.cache_ttl,
42
+ max_cache_size=cache_settings.max_cache_size
43
+ )
44
+
45
+ def _ensure_user_access(self, obj) -> None:
46
+ """Ensure user has access to object."""
47
+ if hasattr(obj, 'user') and obj.user != self.user:
48
+ raise PermissionError(f"User {self.user.id} cannot access this resource")
49
+
50
+ def _generate_content_hash(self, content: str) -> str:
51
+ """Generate SHA-256 hash for content."""
52
+ import hashlib
53
+ return hashlib.sha256(content.encode()).hexdigest()
@@ -0,0 +1,239 @@
1
+ """
2
+ RAG-powered chat service.
3
+ """
4
+
5
+ from typing import List, Dict, Any, Optional
6
+ from django.utils import timezone
7
+ from django_cfg.modules.django_llm.llm.models import ChatCompletionResponse
8
+ from ..models import ChatSession, ChatMessage, DocumentChunk
9
+ from ..utils.validation import clean_search_results, safe_float
10
+ from .base import BaseService
11
+ from .search_service import SearchService
12
+ from .prompt_builder import SystemPromptBuilder
13
+
14
+
15
+ class ChatService(BaseService):
16
+ """RAG-powered chat service with context management."""
17
+
18
+ def __init__(self, user):
19
+ super().__init__(user)
20
+ self.search_service = SearchService(user)
21
+
22
+ def create_session(
23
+ self,
24
+ title: str = "",
25
+ model_name: str = "openai/gpt-4o-mini",
26
+ temperature: float = 0.7,
27
+ max_context_chunks: int = 5
28
+ ) -> ChatSession:
29
+ """Create new chat session."""
30
+
31
+ session = ChatSession.objects.create(
32
+ user=self.user,
33
+ title=title or "New Chat Session",
34
+ model_name=model_name,
35
+ temperature=temperature,
36
+ max_context_chunks=max_context_chunks,
37
+ is_active=True
38
+ )
39
+
40
+ return session
41
+
42
+ def process_query(
43
+ self,
44
+ session_id: str,
45
+ query: str,
46
+ max_tokens: int = 1000,
47
+ include_sources: bool = True,
48
+ enable_diagrams: bool = False
49
+ ) -> Dict[str, Any]:
50
+ """Process chat query with RAG context."""
51
+
52
+ # Get session
53
+ session = ChatSession.objects.get(
54
+ id=session_id,
55
+ user=self.user,
56
+ is_active=True
57
+ )
58
+
59
+ # Perform universal semantic search for context (documents + archives + external data)
60
+ # Using type-specific thresholds automatically
61
+ raw_search_results = self.search_service.semantic_search_universal(
62
+ query=query,
63
+ limit=session.max_context_chunks,
64
+ threshold=None, # Use type-specific thresholds from configuration
65
+ include_documents=True,
66
+ include_archives=True,
67
+ include_external=True
68
+ )
69
+
70
+ # Clean search results to remove invalid similarity scores
71
+ search_results = clean_search_results(raw_search_results)
72
+
73
+ # Build context messages
74
+ context_messages = self._build_context_messages(
75
+ session=session,
76
+ query=query,
77
+ search_results=search_results,
78
+ enable_diagrams=enable_diagrams
79
+ )
80
+
81
+ # Generate LLM response (now returns ChatCompletionResponse Pydantic model)
82
+ response: ChatCompletionResponse = self.llm_client.chat_completion(
83
+ messages=context_messages,
84
+ model=session.model_name,
85
+ temperature=session.temperature,
86
+ max_tokens=max_tokens
87
+ )
88
+
89
+ # Save user message
90
+ context_chunk_ids = []
91
+ for result in search_results:
92
+ if result['type'] == 'document':
93
+ context_chunk_ids.append(f"doc:{result['chunk'].id}")
94
+ elif result['type'] == 'archive':
95
+ context_chunk_ids.append(f"archive:{result['chunk'].id}")
96
+ elif result['type'] == 'external_data':
97
+ context_chunk_ids.append(f"external:{result['chunk'].id}")
98
+
99
+ user_message = ChatMessage.objects.create(
100
+ session=session,
101
+ user=self.user,
102
+ role=ChatMessage.MessageRole.USER,
103
+ content=query,
104
+ context_chunks=context_chunk_ids
105
+ )
106
+
107
+ # Save assistant response
108
+ assistant_message = ChatMessage.objects.create(
109
+ session=session,
110
+ user=self.user,
111
+ role=ChatMessage.MessageRole.ASSISTANT,
112
+ content=response.content,
113
+ tokens_used=response.tokens_used,
114
+ cost_usd=response.cost_usd,
115
+ processing_time_ms=int(response.processing_time * 1000),
116
+ model_name=session.model_name,
117
+ finish_reason=response.finish_reason
118
+ )
119
+
120
+ # Update session statistics (messages_count is handled by signals)
121
+ session.total_tokens_used += response.tokens_used
122
+ session.total_cost_usd = safe_float(session.total_cost_usd, 0.0) + safe_float(response.cost_usd, 0.0)
123
+ session.save()
124
+
125
+ # Auto-generate session title if empty
126
+ if not session.title or session.title == "New Chat Session":
127
+ session.title = query[:50] + "..." if len(query) > 50 else query
128
+ session.save()
129
+
130
+ result = {
131
+ 'message_id': str(assistant_message.id),
132
+ 'content': response.content,
133
+ 'tokens_used': response.tokens_used,
134
+ 'cost_usd': safe_float(response.cost_usd, 0.0),
135
+ 'processing_time_ms': int(response.processing_time * 1000),
136
+ 'model_used': session.model_name
137
+ }
138
+
139
+ if include_sources:
140
+ # Search results are already cleaned by clean_search_results()
141
+ result['sources'] = [
142
+ {
143
+ 'type': search_result['type'],
144
+ 'source_title': search_result['source_title'],
145
+ 'chunk_content': search_result['content'][:200] + "..." if len(search_result['content']) > 200 else search_result['content'],
146
+ 'similarity': search_result['similarity'], # Already validated
147
+ 'metadata': search_result['metadata']
148
+ }
149
+ for search_result in search_results
150
+ ]
151
+
152
+ return result
153
+
154
+ def _build_context_messages(
155
+ self,
156
+ session: ChatSession,
157
+ query: str,
158
+ search_results: List[Dict[str, Any]],
159
+ enable_diagrams: bool = False
160
+ ) -> List[Dict[str, str]]:
161
+ """Build context messages for LLM."""
162
+
163
+ messages = []
164
+
165
+ # Build system message using SystemPromptBuilder
166
+ if enable_diagrams:
167
+ system_message = SystemPromptBuilder.build_diagram_enhanced_prompt(
168
+ search_results=search_results if search_results else None
169
+ )
170
+ else:
171
+ system_message = SystemPromptBuilder.build_conversation_prompt(
172
+ search_results=search_results if search_results else None
173
+ )
174
+
175
+ messages.append({
176
+ "role": "system",
177
+ "content": system_message
178
+ })
179
+
180
+ # Add recent conversation history (last 5 messages)
181
+ recent_messages = list(ChatMessage.objects.filter(
182
+ session=session
183
+ ).order_by('-created_at')[:5])
184
+
185
+ # Reverse to get chronological order
186
+ for message in reversed(recent_messages):
187
+ messages.append({
188
+ "role": message.role,
189
+ "content": message.content
190
+ })
191
+
192
+ # Add current query
193
+ messages.append({
194
+ "role": "user",
195
+ "content": query
196
+ })
197
+
198
+ return messages
199
+
200
+ def get_session_history(
201
+ self,
202
+ session_id: str,
203
+ limit: int = 50
204
+ ) -> List[ChatMessage]:
205
+ """Get chat session message history."""
206
+
207
+ # Verify session access
208
+ session = ChatSession.objects.get(
209
+ id=session_id,
210
+ user=self.user
211
+ )
212
+
213
+ messages = ChatMessage.objects.filter(
214
+ session=session
215
+ ).order_by('created_at')[:limit]
216
+
217
+ return list(messages)
218
+
219
+ def list_sessions(self, active_only: bool = True) -> List[ChatSession]:
220
+ """List user chat sessions."""
221
+
222
+ queryset = ChatSession.objects.filter(user=self.user)
223
+
224
+ if active_only:
225
+ queryset = queryset.filter(is_active=True)
226
+
227
+ return list(queryset.order_by('-created_at'))
228
+
229
+ def delete_session(self, session_id: str) -> bool:
230
+ """Delete chat session and all messages."""
231
+ try:
232
+ session = ChatSession.objects.get(
233
+ id=session_id,
234
+ user=self.user
235
+ )
236
+ session.delete()
237
+ return True
238
+ except ChatSession.DoesNotExist:
239
+ return False