django-cfg 1.1.82__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (244) hide show
  1. django_cfg/__init__.py +20 -448
  2. django_cfg/apps/accounts/README.md +3 -3
  3. django_cfg/apps/accounts/admin/__init__.py +0 -2
  4. django_cfg/apps/accounts/admin/activity.py +2 -9
  5. django_cfg/apps/accounts/admin/filters.py +0 -42
  6. django_cfg/apps/accounts/admin/inlines.py +8 -8
  7. django_cfg/apps/accounts/admin/otp.py +5 -5
  8. django_cfg/apps/accounts/admin/registration_source.py +1 -8
  9. django_cfg/apps/accounts/admin/user.py +12 -20
  10. django_cfg/apps/accounts/managers/user_manager.py +2 -129
  11. django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
  12. django_cfg/apps/accounts/models.py +3 -123
  13. django_cfg/apps/accounts/serializers/otp.py +40 -44
  14. django_cfg/apps/accounts/serializers/profile.py +0 -2
  15. django_cfg/apps/accounts/services/otp_service.py +98 -186
  16. django_cfg/apps/accounts/signals.py +25 -15
  17. django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
  18. django_cfg/apps/accounts/views/otp.py +35 -36
  19. django_cfg/apps/agents/README.md +129 -0
  20. django_cfg/apps/agents/__init__.py +68 -0
  21. django_cfg/apps/agents/admin/__init__.py +17 -0
  22. django_cfg/apps/agents/admin/execution_admin.py +460 -0
  23. django_cfg/apps/agents/admin/registry_admin.py +360 -0
  24. django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
  25. django_cfg/apps/agents/apps.py +29 -0
  26. django_cfg/apps/agents/core/__init__.py +20 -0
  27. django_cfg/apps/agents/core/agent.py +281 -0
  28. django_cfg/apps/agents/core/dependencies.py +154 -0
  29. django_cfg/apps/agents/core/exceptions.py +66 -0
  30. django_cfg/apps/agents/core/models.py +106 -0
  31. django_cfg/apps/agents/core/orchestrator.py +391 -0
  32. django_cfg/apps/agents/examples/__init__.py +3 -0
  33. django_cfg/apps/agents/examples/simple_example.py +161 -0
  34. django_cfg/apps/agents/integration/__init__.py +14 -0
  35. django_cfg/apps/agents/integration/middleware.py +80 -0
  36. django_cfg/apps/agents/integration/registry.py +345 -0
  37. django_cfg/apps/agents/integration/signals.py +50 -0
  38. django_cfg/apps/agents/management/__init__.py +3 -0
  39. django_cfg/apps/agents/management/commands/__init__.py +3 -0
  40. django_cfg/apps/agents/management/commands/create_agent.py +365 -0
  41. django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
  42. django_cfg/apps/agents/managers/__init__.py +23 -0
  43. django_cfg/apps/agents/managers/execution.py +236 -0
  44. django_cfg/apps/agents/managers/registry.py +254 -0
  45. django_cfg/apps/agents/managers/toolsets.py +496 -0
  46. django_cfg/apps/agents/migrations/0001_initial.py +286 -0
  47. django_cfg/apps/agents/migrations/__init__.py +5 -0
  48. django_cfg/apps/agents/models/__init__.py +15 -0
  49. django_cfg/apps/agents/models/execution.py +215 -0
  50. django_cfg/apps/agents/models/registry.py +220 -0
  51. django_cfg/apps/agents/models/toolsets.py +305 -0
  52. django_cfg/apps/agents/patterns/__init__.py +24 -0
  53. django_cfg/apps/agents/patterns/content_agents.py +234 -0
  54. django_cfg/apps/agents/toolsets/__init__.py +15 -0
  55. django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
  56. django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
  57. django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
  58. django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
  59. django_cfg/apps/agents/urls.py +46 -0
  60. django_cfg/apps/knowbase/README.md +150 -0
  61. django_cfg/apps/knowbase/__init__.py +27 -0
  62. django_cfg/apps/knowbase/admin/__init__.py +23 -0
  63. django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
  64. django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
  65. django_cfg/apps/knowbase/admin/document_admin.py +650 -0
  66. django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
  67. django_cfg/apps/knowbase/apps.py +81 -0
  68. django_cfg/apps/knowbase/config/README.md +176 -0
  69. django_cfg/apps/knowbase/config/__init__.py +51 -0
  70. django_cfg/apps/knowbase/config/constance_fields.py +186 -0
  71. django_cfg/apps/knowbase/config/constance_settings.py +200 -0
  72. django_cfg/apps/knowbase/config/settings.py +450 -0
  73. django_cfg/apps/knowbase/examples/__init__.py +3 -0
  74. django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
  75. django_cfg/apps/knowbase/management/__init__.py +0 -0
  76. django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
  77. django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
  78. django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
  79. django_cfg/apps/knowbase/managers/__init__.py +22 -0
  80. django_cfg/apps/knowbase/managers/archive.py +426 -0
  81. django_cfg/apps/knowbase/managers/base.py +32 -0
  82. django_cfg/apps/knowbase/managers/chat.py +141 -0
  83. django_cfg/apps/knowbase/managers/document.py +203 -0
  84. django_cfg/apps/knowbase/managers/external_data.py +471 -0
  85. django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
  86. django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
  87. django_cfg/apps/knowbase/migrations/__init__.py +5 -0
  88. django_cfg/apps/knowbase/mixins/__init__.py +15 -0
  89. django_cfg/apps/knowbase/mixins/config.py +108 -0
  90. django_cfg/apps/knowbase/mixins/creator.py +81 -0
  91. django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
  92. django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
  93. django_cfg/apps/knowbase/mixins/service.py +362 -0
  94. django_cfg/apps/knowbase/models/__init__.py +41 -0
  95. django_cfg/apps/knowbase/models/archive.py +599 -0
  96. django_cfg/apps/knowbase/models/base.py +58 -0
  97. django_cfg/apps/knowbase/models/chat.py +157 -0
  98. django_cfg/apps/knowbase/models/document.py +267 -0
  99. django_cfg/apps/knowbase/models/external_data.py +376 -0
  100. django_cfg/apps/knowbase/serializers/__init__.py +68 -0
  101. django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
  102. django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
  103. django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
  104. django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
  105. django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
  106. django_cfg/apps/knowbase/services/__init__.py +40 -0
  107. django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
  108. django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
  109. django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
  110. django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
  111. django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
  112. django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
  113. django_cfg/apps/knowbase/services/base.py +53 -0
  114. django_cfg/apps/knowbase/services/chat_service.py +239 -0
  115. django_cfg/apps/knowbase/services/document_service.py +144 -0
  116. django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
  117. django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
  118. django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
  119. django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
  120. django_cfg/apps/knowbase/services/embedding/models.py +229 -0
  121. django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
  122. django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
  123. django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
  124. django_cfg/apps/knowbase/services/search_service.py +293 -0
  125. django_cfg/apps/knowbase/signals/__init__.py +21 -0
  126. django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
  127. django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
  128. django_cfg/apps/knowbase/signals/document_signals.py +143 -0
  129. django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
  130. django_cfg/apps/knowbase/tasks/__init__.py +39 -0
  131. django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
  132. django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
  133. django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
  134. django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
  135. django_cfg/apps/knowbase/urls.py +43 -0
  136. django_cfg/apps/knowbase/utils/__init__.py +12 -0
  137. django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
  138. django_cfg/apps/knowbase/utils/text_processing.py +375 -0
  139. django_cfg/apps/knowbase/utils/validation.py +99 -0
  140. django_cfg/apps/knowbase/views/__init__.py +28 -0
  141. django_cfg/apps/knowbase/views/archive_views.py +469 -0
  142. django_cfg/apps/knowbase/views/base.py +49 -0
  143. django_cfg/apps/knowbase/views/chat_views.py +181 -0
  144. django_cfg/apps/knowbase/views/document_views.py +183 -0
  145. django_cfg/apps/knowbase/views/public_views.py +129 -0
  146. django_cfg/apps/leads/admin.py +70 -0
  147. django_cfg/apps/newsletter/admin.py +234 -0
  148. django_cfg/apps/newsletter/admin_filters.py +124 -0
  149. django_cfg/apps/support/admin.py +196 -0
  150. django_cfg/apps/support/admin_filters.py +71 -0
  151. django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
  152. django_cfg/apps/urls.py +5 -4
  153. django_cfg/cli/README.md +1 -1
  154. django_cfg/cli/commands/create_project.py +2 -2
  155. django_cfg/cli/commands/info.py +1 -1
  156. django_cfg/config.py +44 -0
  157. django_cfg/core/config.py +29 -82
  158. django_cfg/core/environment.py +1 -1
  159. django_cfg/core/generation.py +19 -107
  160. django_cfg/{integration.py → core/integration.py} +18 -16
  161. django_cfg/core/validation.py +1 -1
  162. django_cfg/management/__init__.py +1 -1
  163. django_cfg/management/commands/__init__.py +1 -1
  164. django_cfg/management/commands/auto_generate.py +482 -0
  165. django_cfg/management/commands/migrator.py +19 -101
  166. django_cfg/management/commands/test_email.py +1 -1
  167. django_cfg/middleware/README.md +0 -158
  168. django_cfg/middleware/__init__.py +0 -2
  169. django_cfg/middleware/user_activity.py +3 -3
  170. django_cfg/models/api.py +145 -0
  171. django_cfg/models/base.py +287 -0
  172. django_cfg/models/cache.py +4 -4
  173. django_cfg/models/constance.py +25 -88
  174. django_cfg/models/database.py +9 -9
  175. django_cfg/models/drf.py +3 -36
  176. django_cfg/models/email.py +163 -0
  177. django_cfg/models/environment.py +276 -0
  178. django_cfg/models/limits.py +1 -1
  179. django_cfg/models/logging.py +366 -0
  180. django_cfg/models/revolution.py +41 -2
  181. django_cfg/models/security.py +125 -0
  182. django_cfg/models/services.py +1 -1
  183. django_cfg/modules/__init__.py +2 -56
  184. django_cfg/modules/base.py +78 -52
  185. django_cfg/modules/django_currency/service.py +2 -2
  186. django_cfg/modules/django_email.py +2 -2
  187. django_cfg/modules/django_health.py +267 -0
  188. django_cfg/modules/django_llm/llm/client.py +91 -19
  189. django_cfg/modules/django_llm/translator/translator.py +2 -2
  190. django_cfg/modules/django_logger.py +2 -2
  191. django_cfg/modules/django_ngrok.py +2 -2
  192. django_cfg/modules/django_tasks.py +68 -3
  193. django_cfg/modules/django_telegram.py +3 -3
  194. django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
  195. django_cfg/modules/django_twilio/service.py +2 -2
  196. django_cfg/modules/django_twilio/simple_service.py +2 -2
  197. django_cfg/modules/django_twilio/twilio_service.py +2 -2
  198. django_cfg/modules/django_unfold/__init__.py +69 -0
  199. django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
  200. django_cfg/modules/django_unfold/dashboard.py +278 -0
  201. django_cfg/modules/django_unfold/icons/README.md +145 -0
  202. django_cfg/modules/django_unfold/icons/__init__.py +12 -0
  203. django_cfg/modules/django_unfold/icons/constants.py +2851 -0
  204. django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
  205. django_cfg/modules/django_unfold/models/__init__.py +42 -0
  206. django_cfg/modules/django_unfold/models/config.py +601 -0
  207. django_cfg/modules/django_unfold/models/dashboard.py +206 -0
  208. django_cfg/modules/django_unfold/models/dropdown.py +40 -0
  209. django_cfg/modules/django_unfold/models/navigation.py +73 -0
  210. django_cfg/modules/django_unfold/models/tabs.py +25 -0
  211. django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
  212. django_cfg/modules/django_unfold/utils.py +140 -0
  213. django_cfg/registry/__init__.py +23 -0
  214. django_cfg/registry/core.py +61 -0
  215. django_cfg/registry/exceptions.py +11 -0
  216. django_cfg/registry/modules.py +12 -0
  217. django_cfg/registry/services.py +26 -0
  218. django_cfg/registry/third_party.py +52 -0
  219. django_cfg/routing/__init__.py +19 -0
  220. django_cfg/routing/callbacks.py +198 -0
  221. django_cfg/routing/routers.py +48 -0
  222. django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
  223. django_cfg/templatetags/__init__.py +0 -0
  224. django_cfg/templatetags/django_cfg.py +33 -0
  225. django_cfg/urls.py +33 -0
  226. django_cfg/utils/path_resolution.py +1 -1
  227. django_cfg/utils/smart_defaults.py +7 -61
  228. django_cfg/utils/toolkit.py +663 -0
  229. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/METADATA +83 -86
  230. django_cfg-1.2.1.dist-info/RECORD +441 -0
  231. django_cfg/archive/django_sample.zip +0 -0
  232. django_cfg/models/unfold.py +0 -271
  233. django_cfg/modules/unfold/__init__.py +0 -29
  234. django_cfg/modules/unfold/dashboard.py +0 -318
  235. django_cfg/pyproject.toml +0 -370
  236. django_cfg/routers.py +0 -83
  237. django_cfg-1.1.82.dist-info/RECORD +0 -278
  238. /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
  239. /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
  240. /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
  241. /django_cfg/{version_check.py → utils/version_check.py} +0 -0
  242. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/WHEEL +0 -0
  243. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/entry_points.txt +0 -0
  244. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,316 @@
1
+ """
2
+ Archive processing tasks with Dramatiq.
3
+ """
4
+
5
+ import dramatiq
6
+ import logging
7
+ import time
8
+ from typing import Dict, Any
9
+ from django.db import transaction
10
+ from django.utils import timezone
11
+ from django.contrib.auth import get_user_model
12
+
13
+ from ..models.archive import DocumentArchive, ArchiveItem, ArchiveItemChunk
14
+ from ..models.base import ProcessingStatus
15
+ from ..services.archive import (
16
+ DocumentArchiveService,
17
+ ArchiveVectorizationService,
18
+ ArchiveProcessingError
19
+ )
20
+
21
+ logger = logging.getLogger(__name__)
22
+ User = get_user_model()
23
+
24
+
25
+ @dramatiq.actor(
26
+ queue_name="knowledge",
27
+ max_retries=3,
28
+ min_backoff=1000, # 1 second
29
+ max_backoff=30000, # 30 seconds
30
+ priority=5
31
+ )
32
+ def process_archive_task(archive_id: str, user_id: str) -> bool:
33
+ """
34
+ Process a document archive asynchronously.
35
+
36
+ Args:
37
+ archive_id: ID of the archive to process
38
+ user_id: ID of the user who owns the archive
39
+
40
+ Returns:
41
+ True if processing was successful
42
+
43
+ Raises:
44
+ ArchiveProcessingError: If processing fails
45
+ """
46
+ logger.info(f"Starting archive processing for archive {archive_id}")
47
+
48
+ try:
49
+ # Get archive and user
50
+ archive = DocumentArchive.objects.all_users().get(pk=archive_id)
51
+ user = User.objects.get(pk=user_id)
52
+
53
+ # Debug logging
54
+ logger.info(f"Retrieved archive: {archive}, type: {type(archive)}")
55
+ logger.info(f"Archive ID: {archive.id if archive else 'None'}")
56
+ logger.info(f"Archive file: {archive.archive_file if archive else 'None'}")
57
+
58
+ if not archive:
59
+ raise ArchiveProcessingError(
60
+ message=f"Archive {archive_id} not found or is None",
61
+ code="ARCHIVE_NOT_FOUND"
62
+ )
63
+
64
+ # Verify user owns the archive
65
+ if archive.user_id != user.id:
66
+ raise ArchiveProcessingError(
67
+ message=f"User {user_id} does not own archive {archive_id}",
68
+ code="UNAUTHORIZED_ACCESS"
69
+ )
70
+
71
+ # Initialize services
72
+ service = DocumentArchiveService(user=user)
73
+
74
+ # Process the archive (remove transaction.atomic to avoid nested transaction conflicts)
75
+ success = service.process_archive(archive)
76
+
77
+ if success:
78
+ logger.info(f"Successfully processed archive {archive_id}")
79
+ return True
80
+ else:
81
+ logger.error(f"Failed to process archive {archive_id}")
82
+ return False
83
+
84
+ except DocumentArchive.DoesNotExist:
85
+ logger.error(f"Archive {archive_id} not found")
86
+ raise
87
+ except User.DoesNotExist:
88
+ logger.error(f"User {user_id} not found")
89
+ raise
90
+ except Exception as e:
91
+ logger.error(f"Error processing archive {archive_id}: {str(e)}")
92
+ raise ArchiveProcessingError(
93
+ message=f"Archive processing failed: {str(e)}",
94
+ code="PROCESSING_FAILED"
95
+ )
96
+
97
+
98
+ @dramatiq.actor(
99
+ queue_name="knowledge",
100
+ max_retries=2,
101
+ min_backoff=2000, # 2 seconds
102
+ max_backoff=60000, # 60 seconds
103
+ priority=4
104
+ )
105
+ def vectorize_archive_items_task(archive_id: str, user_id: str) -> int:
106
+ """
107
+ Vectorize all items in a document archive.
108
+
109
+ Args:
110
+ archive_id: ID of the archive to vectorize
111
+ user_id: ID of the user who owns the archive
112
+
113
+ Returns:
114
+ Number of items vectorized
115
+
116
+ Raises:
117
+ ArchiveProcessingError: If vectorization fails
118
+ """
119
+ logger.info(f"Starting vectorization for archive {archive_id}")
120
+
121
+ try:
122
+ # Get archive and user
123
+ archive = DocumentArchive.objects.all_users().get(pk=archive_id)
124
+ user = User.objects.get(pk=user_id)
125
+
126
+ # Verify user owns the archive
127
+ if archive.user_id != user.id:
128
+ raise ArchiveProcessingError(
129
+ message=f"User {user_id} does not own archive {archive_id}",
130
+ code="UNAUTHORIZED_ACCESS"
131
+ )
132
+
133
+ # Initialize vectorization service
134
+ service = ArchiveVectorizationService(user=user)
135
+
136
+ # Vectorize archive items
137
+ vectorized_count = service.vectorize_archive_items(archive)
138
+
139
+ logger.info(f"Successfully vectorized {vectorized_count} items for archive {archive_id}")
140
+ return vectorized_count
141
+
142
+ except DocumentArchive.DoesNotExist:
143
+ logger.error(f"Archive {archive_id} not found")
144
+ raise
145
+ except User.DoesNotExist:
146
+ logger.error(f"User {user_id} not found")
147
+ raise
148
+ except Exception as e:
149
+ logger.error(f"Error vectorizing archive {archive_id}: {str(e)}")
150
+ raise ArchiveProcessingError(
151
+ message=f"Archive vectorization failed: {str(e)}",
152
+ code="VECTORIZATION_FAILED"
153
+ )
154
+
155
+
156
+ @dramatiq.actor(
157
+ queue_name="knowledge",
158
+ max_retries=1,
159
+ priority=2
160
+ )
161
+ def cleanup_failed_archives_task(days_old: int = 7) -> int:
162
+ """
163
+ Clean up failed archives older than specified days.
164
+
165
+ Args:
166
+ days_old: Age threshold for cleanup (default: 7 days)
167
+
168
+ Returns:
169
+ Number of archives cleaned up
170
+ """
171
+ logger.info(f"Starting cleanup of failed archives older than {days_old} days")
172
+
173
+ try:
174
+ cutoff_date = timezone.now() - timezone.timedelta(days=days_old)
175
+
176
+ # Find failed archives older than cutoff
177
+ failed_archives = DocumentArchive.objects.filter(
178
+ processing_status=ProcessingStatus.FAILED,
179
+ created_at__lt=cutoff_date
180
+ )
181
+
182
+ count = failed_archives.count()
183
+
184
+ # Delete the archives (cascade will handle related objects)
185
+ deleted_count, _ = failed_archives.delete()
186
+
187
+ logger.info(f"Cleaned up {deleted_count} failed archives")
188
+ return deleted_count
189
+
190
+ except Exception as e:
191
+ logger.error(f"Error during archive cleanup: {str(e)}")
192
+ raise
193
+
194
+
195
+ @dramatiq.actor(
196
+ queue_name="knowledge",
197
+ max_retries=1,
198
+ priority=1
199
+ )
200
+ def generate_archive_statistics_task(user_id: str) -> Dict[str, Any]:
201
+ """
202
+ Generate statistics for user's archives.
203
+
204
+ Args:
205
+ user_id: ID of the user
206
+
207
+ Returns:
208
+ Dictionary with archive statistics
209
+ """
210
+ logger.info(f"Generating archive statistics for user {user_id}")
211
+
212
+ try:
213
+ user = User.objects.get(pk=user_id)
214
+
215
+ # Get user's archives
216
+ archives = DocumentArchive.objects.filter(user=user)
217
+
218
+ # Calculate statistics
219
+ stats = {
220
+ 'total_archives': archives.count(),
221
+ 'completed_archives': archives.filter(processing_status=ProcessingStatus.COMPLETED).count(),
222
+ 'pending_archives': archives.filter(processing_status=ProcessingStatus.PENDING).count(),
223
+ 'processing_archives': archives.filter(processing_status=ProcessingStatus.PROCESSING).count(),
224
+ 'failed_archives': archives.filter(processing_status=ProcessingStatus.FAILED).count(),
225
+ 'total_items': sum(archive.total_items for archive in archives),
226
+ 'total_chunks': sum(archive.total_chunks for archive in archives),
227
+ 'total_cost': sum(archive.total_cost_usd for archive in archives),
228
+ }
229
+
230
+ logger.info(f"Generated statistics for user {user_id}: {stats}")
231
+ return stats
232
+
233
+ except User.DoesNotExist:
234
+ logger.error(f"User {user_id} not found")
235
+ raise
236
+ except Exception as e:
237
+ logger.error(f"Error generating statistics for user {user_id}: {str(e)}")
238
+ raise
239
+
240
+
241
+ @dramatiq.actor(
242
+ queue_name="knowledge",
243
+ max_retries=1,
244
+ priority=1
245
+ )
246
+ def archive_health_check_task() -> Dict[str, Any]:
247
+ """
248
+ Perform health check on archive system.
249
+
250
+ Returns:
251
+ Dictionary with health check results
252
+ """
253
+ logger.info("Starting archive system health check")
254
+
255
+ try:
256
+ # Check database connectivity
257
+ total_archives = DocumentArchive.objects.count()
258
+
259
+ # Check for orphaned items
260
+ orphaned_items = ArchiveItem.objects.filter(archive__isnull=True).count()
261
+
262
+ # Check for orphaned chunks
263
+ orphaned_chunks = ArchiveItemChunk.objects.filter(item__isnull=True).count()
264
+
265
+ # Check processing status distribution
266
+ status_counts = {}
267
+ for status in ProcessingStatus:
268
+ count = DocumentArchive.objects.filter(processing_status=status).count()
269
+ status_counts[status.value] = count
270
+
271
+ # Check for archives with missing files
272
+ archives_with_files = DocumentArchive.objects.exclude(file_path__isnull=True).exclude(file_path='')
273
+ unhealthy_archives = 0
274
+
275
+ for archive in archives_with_files:
276
+ import os
277
+ if not os.path.exists(archive.file_path):
278
+ unhealthy_archives += 1
279
+
280
+ health_data = {
281
+ 'total_checked': total_archives,
282
+ 'healthy_archives': total_archives - unhealthy_archives,
283
+ 'unhealthy_archives': unhealthy_archives,
284
+ 'orphaned_items': orphaned_items,
285
+ 'orphaned_chunks': orphaned_chunks,
286
+ 'status_distribution': status_counts,
287
+ 'timestamp': timezone.now().isoformat()
288
+ }
289
+
290
+ logger.info(f"Health check completed: {health_data}")
291
+ return health_data
292
+
293
+ except Exception as e:
294
+ logger.error(f"Error during health check: {str(e)}")
295
+ raise
296
+
297
+
298
+ # Test task for development
299
+ @dramatiq.actor(
300
+ queue_name="knowledge",
301
+ max_retries=0,
302
+ priority=1
303
+ )
304
+ def test_archive_task(message: str = "Hello from archive tasks!") -> str:
305
+ """
306
+ Simple test task for archive system.
307
+
308
+ Args:
309
+ message: Test message to process
310
+
311
+ Returns:
312
+ Processed message
313
+ """
314
+ logger.info(f"Test archive task executed with message: {message}")
315
+ time.sleep(1) # Simulate some work
316
+ return f"Processed: {message}"
@@ -0,0 +1,341 @@
1
+ """
2
+ Document processing tasks with Dramatiq.
3
+ """
4
+
5
+ import dramatiq
6
+ import logging
7
+ import time
8
+ from typing import Dict, List, Any, Tuple, Optional
9
+ from django.db import transaction
10
+ from django.utils import timezone
11
+ from django_cfg.modules.django_llm.llm.client import LLMClient
12
+ from django.conf import settings
13
+
14
+ from ..models import Document, DocumentChunk, ProcessingStatus
15
+ from ..utils.text_processing import TextProcessor, SemanticChunker
16
+ from ..services.embedding import process_document_chunks_optimized
17
+ from ..utils.chunk_settings import get_chunking_params_for_type, get_embedding_model
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dramatiq.actor(
23
+ queue_name="knowledge",
24
+ max_retries=3,
25
+ min_backoff=1000, # 1 second
26
+ max_backoff=30000, # 30 seconds
27
+ priority=5
28
+ )
29
+ def process_document_async(
30
+ document_id: str,
31
+ chunk_size: Optional[int] = None,
32
+ chunk_overlap: Optional[int] = None,
33
+ embedding_model: Optional[str] = None
34
+ ) -> Dict[str, Any]:
35
+ """
36
+ Process document asynchronously with full pipeline.
37
+
38
+ Args:
39
+ document_id: Document UUID to process
40
+ chunk_size: Maximum chunk size in characters (uses Constance setting if None)
41
+ chunk_overlap: Overlap between chunks (uses Constance setting if None)
42
+ embedding_model: Model to use for embeddings (uses Constance setting if None)
43
+
44
+ Returns:
45
+ Processing results with statistics
46
+ """
47
+ start_time = time.time()
48
+
49
+ try:
50
+ with transaction.atomic():
51
+ # Load document
52
+ document = Document.objects.select_for_update().get(
53
+ id=document_id
54
+ )
55
+
56
+ # Update processing status
57
+ document.processing_status = ProcessingStatus.PROCESSING
58
+ document.processing_started_at = timezone.now()
59
+ document.save(update_fields=['processing_status', 'processing_started_at'])
60
+
61
+ logger.info(f"Starting document processing: {document_id}")
62
+
63
+ # Get dynamic settings from Constance
64
+ chunking_params = get_chunking_params_for_type('document')
65
+ final_chunk_size = chunk_size or chunking_params['chunk_size']
66
+ final_chunk_overlap = chunk_overlap or chunking_params['overlap']
67
+ final_embedding_model = embedding_model or get_embedding_model()
68
+
69
+ logger.info(f"Using dynamic settings: chunk_size={final_chunk_size}, overlap={final_chunk_overlap}, model={final_embedding_model}")
70
+
71
+ # Initialize services
72
+ text_processor = TextProcessor()
73
+ chunker = SemanticChunker(
74
+ chunk_size=final_chunk_size,
75
+ overlap=final_chunk_overlap
76
+ )
77
+
78
+ # Step 1: Clean and preprocess text
79
+ cleaned_content = text_processor.clean_text(document.content)
80
+
81
+ # Step 2: Create semantic chunks
82
+ chunks = chunker.create_chunks(cleaned_content)
83
+
84
+ logger.info(f"Created {len(chunks)} chunks for document {document_id}")
85
+
86
+ # Step 3: Create chunks without embeddings first
87
+ chunk_objects = []
88
+ for idx, chunk_text in enumerate(chunks):
89
+ chunk = DocumentChunk(
90
+ document=document,
91
+ user_id=document.user_id,
92
+ content=chunk_text,
93
+ chunk_index=idx,
94
+ character_count=len(chunk_text),
95
+ embedding_model=final_embedding_model,
96
+ embedding=[0.0] * 1536, # Temporary zero vector, will be replaced
97
+ metadata={
98
+ "processed_at": timezone.now().isoformat(),
99
+ "chunk_size": len(chunk_text),
100
+ "overlap_size": final_chunk_overlap if idx > 0 else 0
101
+ }
102
+ )
103
+ chunk_objects.append(chunk)
104
+
105
+ # Bulk create chunks for performance
106
+ DocumentChunk.objects.bulk_create(
107
+ chunk_objects,
108
+ batch_size=100
109
+ )
110
+
111
+ # Step 4: Generate embeddings using optimized processor
112
+ created_chunks = DocumentChunk.objects.filter(document=document).order_by('chunk_index')
113
+ chunks_list = list(created_chunks)
114
+ logger.info(f"🔍 About to process {len(chunks_list)} chunks for embeddings")
115
+
116
+ embedding_result = process_document_chunks_optimized(chunks_list)
117
+
118
+ logger.info(f"🔍 Embedding result: {embedding_result.successful_chunks}/{embedding_result.total_chunks}")
119
+
120
+ total_tokens = embedding_result.total_tokens
121
+ total_cost = embedding_result.total_cost
122
+
123
+ logger.info(
124
+ f"Optimized embedding processing: {embedding_result.successful_chunks}/{embedding_result.total_chunks} chunks, "
125
+ f"{total_tokens} tokens, ${total_cost:.4f} cost, {embedding_result.processing_time:.2f}s"
126
+ )
127
+
128
+ # Step 5: Update document status
129
+ processing_time = time.time() - start_time
130
+ document.processing_status = ProcessingStatus.COMPLETED
131
+ document.processing_completed_at = timezone.now()
132
+ document.chunks_count = embedding_result.total_chunks
133
+ document.total_tokens = total_tokens
134
+ document.total_cost_usd = total_cost
135
+ document.save(update_fields=[
136
+ 'processing_status', 'processing_completed_at', 'chunks_count',
137
+ 'total_tokens', 'total_cost_usd'
138
+ ])
139
+
140
+ return {
141
+ "document_id": str(document.id),
142
+ "status": document.processing_status.value,
143
+ "chunks_count": document.chunks_count,
144
+ "total_tokens": document.total_tokens,
145
+ "total_cost_usd": document.total_cost_usd,
146
+ "processing_time": processing_time,
147
+ "errors": embedding_result.errors
148
+ }
149
+
150
+ except Document.DoesNotExist:
151
+ logger.error(f"Document {document_id} not found.")
152
+ return {
153
+ "document_id": document_id,
154
+ "status": ProcessingStatus.FAILED.value,
155
+ "error": f"Document {document_id} not found."
156
+ }
157
+ except Exception as exc:
158
+ document = Document.objects.filter(id=document_id).first()
159
+ if document:
160
+ document.processing_status = ProcessingStatus.FAILED
161
+ document.processing_completed_at = timezone.now()
162
+ document.processing_error = str(exc)
163
+ document.save(update_fields=['processing_status', 'processing_completed_at', 'processing_error'])
164
+ logger.error(f"Document processing failed for {document_id}: {exc}", exc_info=True)
165
+ raise
166
+
167
+
168
+ def generate_embeddings_batch(
169
+ chunks: List[str],
170
+ document_id: str,
171
+ embedding_model: str = "text-embedding-ada-002",
172
+ batch_size: int = 50
173
+ ) -> List[Tuple[str, List[float], int, float]]:
174
+ """
175
+ Generate embeddings for text chunks in batches.
176
+
177
+ Args:
178
+ chunks: List of text chunks
179
+ document_id: Parent document ID
180
+ embedding_model: Model to use for embeddings
181
+ batch_size: Number of chunks per batch
182
+
183
+ Returns:
184
+ List of (chunk_text, embedding, tokens, cost) tuples
185
+ """
186
+ try:
187
+ from django_cfg.apps.knowbase.config.settings import get_openai_api_key, get_openrouter_api_key, get_cache_settings
188
+ cache_settings = get_cache_settings()
189
+ llm_service = LLMClient(
190
+ apikey_openai=get_openai_api_key(),
191
+ apikey_openrouter=get_openrouter_api_key(),
192
+ cache_dir=cache_settings.cache_dir,
193
+ cache_ttl=cache_settings.cache_ttl,
194
+ max_cache_size=cache_settings.max_cache_size
195
+ )
196
+ results = []
197
+
198
+ # Process in batches to avoid rate limits
199
+ for i in range(0, len(chunks), batch_size):
200
+ batch = chunks[i:i + batch_size]
201
+
202
+ for chunk_text in batch:
203
+ # Generate embedding (sync call for simplicity)
204
+ embedding_response = llm_service.generate_embedding(chunk_text, embedding_model)
205
+
206
+ # Extract embedding vector from response
207
+ embedding_vector = embedding_response.embedding if embedding_response else []
208
+
209
+ # Use tokens and cost from embedding response if available
210
+ tokens = embedding_response.tokens if embedding_response else 0
211
+ cost = embedding_response.cost if embedding_response else 0.0
212
+
213
+ # Fallback to manual calculation if needed
214
+ if tokens == 0:
215
+ tokens = llm_service.count_tokens(chunk_text, embedding_model)
216
+ if cost == 0.0:
217
+ cost = llm_service.estimate_cost(embedding_model, tokens, 0)
218
+
219
+ results.append((
220
+ chunk_text,
221
+ embedding_vector,
222
+ tokens,
223
+ cost
224
+ ))
225
+
226
+ # Small delay between requests to respect rate limits
227
+ time.sleep(0.1)
228
+
229
+ # Longer delay between batches
230
+ if i + batch_size < len(chunks):
231
+ time.sleep(1.0)
232
+
233
+ logger.info(f"Generated {len(results)} embeddings for document {document_id}")
234
+ return results
235
+
236
+ except Exception as exc:
237
+ logger.error(f"Batch embedding generation failed: {exc}")
238
+ raise
239
+
240
+
241
+ @dramatiq.actor(
242
+ queue_name="knowledge",
243
+ max_retries=2,
244
+ priority=7 # Higher priority for reprocessing
245
+ )
246
+ def reprocess_document_chunks(
247
+ document_id: str,
248
+ new_chunk_size: int = None,
249
+ new_embedding_model: str = None
250
+ ) -> Dict[str, Any]:
251
+ """
252
+ Reprocess existing document with new parameters.
253
+
254
+ Args:
255
+ document_id: Document to reprocess
256
+ new_chunk_size: New chunk size (optional)
257
+ new_embedding_model: New embedding model (optional)
258
+
259
+ Returns:
260
+ Reprocessing results
261
+ """
262
+ try:
263
+ with transaction.atomic():
264
+ document = Document.objects.get(id=document_id)
265
+
266
+ # Delete existing chunks
267
+ DocumentChunk.objects.filter(
268
+ document=document
269
+ ).delete()
270
+
271
+ # Reset document status
272
+ document.processing_status = ProcessingStatus.PENDING
273
+ document.chunks_count = 0
274
+ document.total_tokens = 0
275
+ document.processing_error = ""
276
+ document.save(update_fields=[
277
+ 'processing_status', 'processing_started_at', 'processing_completed_at',
278
+ 'processing_error', 'chunks_count', 'total_tokens', 'total_cost_usd'
279
+ ])
280
+
281
+ # Trigger reprocessing
282
+ return process_document_async(
283
+ document_id=document_id,
284
+ chunk_size=new_chunk_size or 1000,
285
+ embedding_model=new_embedding_model or "text-embedding-ada-002"
286
+ )
287
+
288
+ except Exception as exc:
289
+ logger.error(f"Reprocessing failed for {document_id}: {exc}")
290
+ raise
291
+
292
+
293
+ @dramatiq.actor(
294
+ queue_name="knowledge",
295
+ max_retries=2,
296
+ priority=4
297
+ )
298
+ def optimize_document_embeddings(document_id: str) -> Dict[str, Any]:
299
+ """
300
+ Post-processing optimization for document embeddings.
301
+
302
+ Args:
303
+ document_id: Document to optimize
304
+
305
+ Returns:
306
+ Optimization results
307
+ """
308
+ try:
309
+ # Update vector index statistics
310
+ from django.db import connection
311
+
312
+ with connection.cursor() as cursor:
313
+ # Always analyze the table
314
+ cursor.execute("ANALYZE django_cfg_knowbase_document_chunks;")
315
+
316
+ # Check if index exists before trying to reindex
317
+ cursor.execute("""
318
+ SELECT EXISTS (
319
+ SELECT FROM pg_indexes
320
+ WHERE indexname = 'embedding_cosine_idx'
321
+ );
322
+ """)
323
+ index_exists = cursor.fetchone()[0]
324
+
325
+ if index_exists:
326
+ cursor.execute("REINDEX INDEX embedding_cosine_idx;")
327
+ logger.debug("Reindexed embedding_cosine_idx")
328
+ else:
329
+ logger.warning("embedding_cosine_idx index does not exist, skipping reindex")
330
+
331
+ logger.info(f"Optimized embeddings for document {document_id}")
332
+
333
+ return {
334
+ "status": "optimized",
335
+ "document_id": document_id,
336
+ "timestamp": timezone.now().isoformat()
337
+ }
338
+
339
+ except Exception as exc:
340
+ logger.error(f"Embedding optimization failed for {document_id}: {exc}")
341
+ raise