django-cfg 1.1.82__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (244) hide show
  1. django_cfg/__init__.py +20 -448
  2. django_cfg/apps/accounts/README.md +3 -3
  3. django_cfg/apps/accounts/admin/__init__.py +0 -2
  4. django_cfg/apps/accounts/admin/activity.py +2 -9
  5. django_cfg/apps/accounts/admin/filters.py +0 -42
  6. django_cfg/apps/accounts/admin/inlines.py +8 -8
  7. django_cfg/apps/accounts/admin/otp.py +5 -5
  8. django_cfg/apps/accounts/admin/registration_source.py +1 -8
  9. django_cfg/apps/accounts/admin/user.py +12 -20
  10. django_cfg/apps/accounts/managers/user_manager.py +2 -129
  11. django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
  12. django_cfg/apps/accounts/models.py +3 -123
  13. django_cfg/apps/accounts/serializers/otp.py +40 -44
  14. django_cfg/apps/accounts/serializers/profile.py +0 -2
  15. django_cfg/apps/accounts/services/otp_service.py +98 -186
  16. django_cfg/apps/accounts/signals.py +25 -15
  17. django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
  18. django_cfg/apps/accounts/views/otp.py +35 -36
  19. django_cfg/apps/agents/README.md +129 -0
  20. django_cfg/apps/agents/__init__.py +68 -0
  21. django_cfg/apps/agents/admin/__init__.py +17 -0
  22. django_cfg/apps/agents/admin/execution_admin.py +460 -0
  23. django_cfg/apps/agents/admin/registry_admin.py +360 -0
  24. django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
  25. django_cfg/apps/agents/apps.py +29 -0
  26. django_cfg/apps/agents/core/__init__.py +20 -0
  27. django_cfg/apps/agents/core/agent.py +281 -0
  28. django_cfg/apps/agents/core/dependencies.py +154 -0
  29. django_cfg/apps/agents/core/exceptions.py +66 -0
  30. django_cfg/apps/agents/core/models.py +106 -0
  31. django_cfg/apps/agents/core/orchestrator.py +391 -0
  32. django_cfg/apps/agents/examples/__init__.py +3 -0
  33. django_cfg/apps/agents/examples/simple_example.py +161 -0
  34. django_cfg/apps/agents/integration/__init__.py +14 -0
  35. django_cfg/apps/agents/integration/middleware.py +80 -0
  36. django_cfg/apps/agents/integration/registry.py +345 -0
  37. django_cfg/apps/agents/integration/signals.py +50 -0
  38. django_cfg/apps/agents/management/__init__.py +3 -0
  39. django_cfg/apps/agents/management/commands/__init__.py +3 -0
  40. django_cfg/apps/agents/management/commands/create_agent.py +365 -0
  41. django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
  42. django_cfg/apps/agents/managers/__init__.py +23 -0
  43. django_cfg/apps/agents/managers/execution.py +236 -0
  44. django_cfg/apps/agents/managers/registry.py +254 -0
  45. django_cfg/apps/agents/managers/toolsets.py +496 -0
  46. django_cfg/apps/agents/migrations/0001_initial.py +286 -0
  47. django_cfg/apps/agents/migrations/__init__.py +5 -0
  48. django_cfg/apps/agents/models/__init__.py +15 -0
  49. django_cfg/apps/agents/models/execution.py +215 -0
  50. django_cfg/apps/agents/models/registry.py +220 -0
  51. django_cfg/apps/agents/models/toolsets.py +305 -0
  52. django_cfg/apps/agents/patterns/__init__.py +24 -0
  53. django_cfg/apps/agents/patterns/content_agents.py +234 -0
  54. django_cfg/apps/agents/toolsets/__init__.py +15 -0
  55. django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
  56. django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
  57. django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
  58. django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
  59. django_cfg/apps/agents/urls.py +46 -0
  60. django_cfg/apps/knowbase/README.md +150 -0
  61. django_cfg/apps/knowbase/__init__.py +27 -0
  62. django_cfg/apps/knowbase/admin/__init__.py +23 -0
  63. django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
  64. django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
  65. django_cfg/apps/knowbase/admin/document_admin.py +650 -0
  66. django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
  67. django_cfg/apps/knowbase/apps.py +81 -0
  68. django_cfg/apps/knowbase/config/README.md +176 -0
  69. django_cfg/apps/knowbase/config/__init__.py +51 -0
  70. django_cfg/apps/knowbase/config/constance_fields.py +186 -0
  71. django_cfg/apps/knowbase/config/constance_settings.py +200 -0
  72. django_cfg/apps/knowbase/config/settings.py +444 -0
  73. django_cfg/apps/knowbase/examples/__init__.py +3 -0
  74. django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
  75. django_cfg/apps/knowbase/management/__init__.py +0 -0
  76. django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
  77. django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
  78. django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
  79. django_cfg/apps/knowbase/managers/__init__.py +22 -0
  80. django_cfg/apps/knowbase/managers/archive.py +426 -0
  81. django_cfg/apps/knowbase/managers/base.py +32 -0
  82. django_cfg/apps/knowbase/managers/chat.py +141 -0
  83. django_cfg/apps/knowbase/managers/document.py +203 -0
  84. django_cfg/apps/knowbase/managers/external_data.py +471 -0
  85. django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
  86. django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
  87. django_cfg/apps/knowbase/migrations/__init__.py +5 -0
  88. django_cfg/apps/knowbase/mixins/__init__.py +15 -0
  89. django_cfg/apps/knowbase/mixins/config.py +108 -0
  90. django_cfg/apps/knowbase/mixins/creator.py +81 -0
  91. django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
  92. django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
  93. django_cfg/apps/knowbase/mixins/service.py +362 -0
  94. django_cfg/apps/knowbase/models/__init__.py +41 -0
  95. django_cfg/apps/knowbase/models/archive.py +599 -0
  96. django_cfg/apps/knowbase/models/base.py +58 -0
  97. django_cfg/apps/knowbase/models/chat.py +157 -0
  98. django_cfg/apps/knowbase/models/document.py +267 -0
  99. django_cfg/apps/knowbase/models/external_data.py +376 -0
  100. django_cfg/apps/knowbase/serializers/__init__.py +68 -0
  101. django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
  102. django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
  103. django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
  104. django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
  105. django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
  106. django_cfg/apps/knowbase/services/__init__.py +40 -0
  107. django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
  108. django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
  109. django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
  110. django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
  111. django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
  112. django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
  113. django_cfg/apps/knowbase/services/base.py +53 -0
  114. django_cfg/apps/knowbase/services/chat_service.py +239 -0
  115. django_cfg/apps/knowbase/services/document_service.py +144 -0
  116. django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
  117. django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
  118. django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
  119. django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
  120. django_cfg/apps/knowbase/services/embedding/models.py +229 -0
  121. django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
  122. django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
  123. django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
  124. django_cfg/apps/knowbase/services/search_service.py +293 -0
  125. django_cfg/apps/knowbase/signals/__init__.py +21 -0
  126. django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
  127. django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
  128. django_cfg/apps/knowbase/signals/document_signals.py +143 -0
  129. django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
  130. django_cfg/apps/knowbase/tasks/__init__.py +39 -0
  131. django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
  132. django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
  133. django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
  134. django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
  135. django_cfg/apps/knowbase/urls.py +43 -0
  136. django_cfg/apps/knowbase/utils/__init__.py +12 -0
  137. django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
  138. django_cfg/apps/knowbase/utils/text_processing.py +375 -0
  139. django_cfg/apps/knowbase/utils/validation.py +99 -0
  140. django_cfg/apps/knowbase/views/__init__.py +28 -0
  141. django_cfg/apps/knowbase/views/archive_views.py +469 -0
  142. django_cfg/apps/knowbase/views/base.py +49 -0
  143. django_cfg/apps/knowbase/views/chat_views.py +181 -0
  144. django_cfg/apps/knowbase/views/document_views.py +183 -0
  145. django_cfg/apps/knowbase/views/public_views.py +129 -0
  146. django_cfg/apps/leads/admin.py +70 -0
  147. django_cfg/apps/newsletter/admin.py +234 -0
  148. django_cfg/apps/newsletter/admin_filters.py +124 -0
  149. django_cfg/apps/support/admin.py +196 -0
  150. django_cfg/apps/support/admin_filters.py +71 -0
  151. django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
  152. django_cfg/apps/urls.py +5 -4
  153. django_cfg/cli/README.md +1 -1
  154. django_cfg/cli/commands/create_project.py +2 -2
  155. django_cfg/cli/commands/info.py +1 -1
  156. django_cfg/config.py +44 -0
  157. django_cfg/core/config.py +29 -82
  158. django_cfg/core/environment.py +1 -1
  159. django_cfg/core/generation.py +19 -107
  160. django_cfg/{integration.py → core/integration.py} +18 -16
  161. django_cfg/core/validation.py +1 -1
  162. django_cfg/management/__init__.py +1 -1
  163. django_cfg/management/commands/__init__.py +1 -1
  164. django_cfg/management/commands/auto_generate.py +482 -0
  165. django_cfg/management/commands/migrator.py +19 -101
  166. django_cfg/management/commands/test_email.py +1 -1
  167. django_cfg/middleware/README.md +0 -158
  168. django_cfg/middleware/__init__.py +0 -2
  169. django_cfg/middleware/user_activity.py +3 -3
  170. django_cfg/models/api.py +145 -0
  171. django_cfg/models/base.py +287 -0
  172. django_cfg/models/cache.py +4 -4
  173. django_cfg/models/constance.py +25 -88
  174. django_cfg/models/database.py +9 -9
  175. django_cfg/models/drf.py +3 -36
  176. django_cfg/models/email.py +163 -0
  177. django_cfg/models/environment.py +276 -0
  178. django_cfg/models/limits.py +1 -1
  179. django_cfg/models/logging.py +366 -0
  180. django_cfg/models/revolution.py +41 -2
  181. django_cfg/models/security.py +125 -0
  182. django_cfg/models/services.py +1 -1
  183. django_cfg/modules/__init__.py +2 -56
  184. django_cfg/modules/base.py +78 -52
  185. django_cfg/modules/django_currency/service.py +2 -2
  186. django_cfg/modules/django_email.py +2 -2
  187. django_cfg/modules/django_health.py +267 -0
  188. django_cfg/modules/django_llm/llm/client.py +79 -17
  189. django_cfg/modules/django_llm/translator/translator.py +2 -2
  190. django_cfg/modules/django_logger.py +2 -2
  191. django_cfg/modules/django_ngrok.py +2 -2
  192. django_cfg/modules/django_tasks.py +68 -3
  193. django_cfg/modules/django_telegram.py +3 -3
  194. django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
  195. django_cfg/modules/django_twilio/service.py +2 -2
  196. django_cfg/modules/django_twilio/simple_service.py +2 -2
  197. django_cfg/modules/django_twilio/twilio_service.py +2 -2
  198. django_cfg/modules/django_unfold/__init__.py +69 -0
  199. django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
  200. django_cfg/modules/django_unfold/dashboard.py +278 -0
  201. django_cfg/modules/django_unfold/icons/README.md +145 -0
  202. django_cfg/modules/django_unfold/icons/__init__.py +12 -0
  203. django_cfg/modules/django_unfold/icons/constants.py +2851 -0
  204. django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
  205. django_cfg/modules/django_unfold/models/__init__.py +42 -0
  206. django_cfg/modules/django_unfold/models/config.py +601 -0
  207. django_cfg/modules/django_unfold/models/dashboard.py +206 -0
  208. django_cfg/modules/django_unfold/models/dropdown.py +40 -0
  209. django_cfg/modules/django_unfold/models/navigation.py +73 -0
  210. django_cfg/modules/django_unfold/models/tabs.py +25 -0
  211. django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
  212. django_cfg/modules/django_unfold/utils.py +140 -0
  213. django_cfg/registry/__init__.py +23 -0
  214. django_cfg/registry/core.py +61 -0
  215. django_cfg/registry/exceptions.py +11 -0
  216. django_cfg/registry/modules.py +12 -0
  217. django_cfg/registry/services.py +26 -0
  218. django_cfg/registry/third_party.py +52 -0
  219. django_cfg/routing/__init__.py +19 -0
  220. django_cfg/routing/callbacks.py +198 -0
  221. django_cfg/routing/routers.py +48 -0
  222. django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
  223. django_cfg/templatetags/__init__.py +0 -0
  224. django_cfg/templatetags/django_cfg.py +33 -0
  225. django_cfg/urls.py +33 -0
  226. django_cfg/utils/path_resolution.py +1 -1
  227. django_cfg/utils/smart_defaults.py +7 -61
  228. django_cfg/utils/toolkit.py +663 -0
  229. {django_cfg-1.1.82.dist-info → django_cfg-1.2.0.dist-info}/METADATA +83 -86
  230. django_cfg-1.2.0.dist-info/RECORD +441 -0
  231. django_cfg/archive/django_sample.zip +0 -0
  232. django_cfg/models/unfold.py +0 -271
  233. django_cfg/modules/unfold/__init__.py +0 -29
  234. django_cfg/modules/unfold/dashboard.py +0 -318
  235. django_cfg/pyproject.toml +0 -370
  236. django_cfg/routers.py +0 -83
  237. django_cfg-1.1.82.dist-info/RECORD +0 -278
  238. /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
  239. /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
  240. /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
  241. /django_cfg/{version_check.py → utils/version_check.py} +0 -0
  242. {django_cfg-1.1.82.dist-info → django_cfg-1.2.0.dist-info}/WHEEL +0 -0
  243. {django_cfg-1.1.82.dist-info → django_cfg-1.2.0.dist-info}/entry_points.txt +0 -0
  244. {django_cfg-1.1.82.dist-info → django_cfg-1.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,541 @@
1
+ """
2
+ Main document archive service.
3
+
4
+ Orchestrates the complete archive processing pipeline with synchronous processing.
5
+ """
6
+
7
+ import os
8
+ import time
9
+ import tempfile
10
+ import hashlib
11
+ import logging
12
+ from typing import List, Dict, Any, Optional
13
+ from django.core.files.uploadedfile import UploadedFile
14
+ from django.contrib.auth import get_user_model
15
+ from django.db import transaction
16
+ from django.utils import timezone
17
+ from pydantic import BaseModel, Field, ValidationError
18
+
19
+ from ...models.archive import DocumentArchive, ArchiveType, ContentType
20
+ from ...models.document import DocumentCategory
21
+ from ...models.base import ProcessingStatus
22
+ from ..base import BaseService
23
+ from .exceptions import (
24
+ ArchiveValidationError,
25
+ ArchiveProcessingError,
26
+ ProcessingTimeoutError
27
+ )
28
+ from .extraction_service import ArchiveExtractionService, ExtractedItemData
29
+ from .chunking_service import ContextualChunkingService
30
+ from .vectorization_service import ArchiveVectorizationService
31
+
32
+ User = get_user_model()
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ class ArchiveUploadRequest(BaseModel):
37
+ """Pydantic model for archive upload validation."""
38
+
39
+ title: str = Field(..., min_length=1, max_length=512)
40
+ description: Optional[str] = Field(None, max_length=2000)
41
+ category_ids: List[str] = Field(default_factory=list)
42
+ is_public: bool = Field(default=True)
43
+ process_immediately: bool = Field(default=True)
44
+
45
+ class Config:
46
+ str_strip_whitespace = True
47
+
48
+
49
+ class ArchiveProcessingResult(BaseModel):
50
+ """Result of archive processing operation."""
51
+
52
+ archive_id: str
53
+ status: str
54
+ processing_time_ms: int
55
+ items_processed: int
56
+ chunks_created: int
57
+ vectorized_chunks: int
58
+ total_cost_usd: float
59
+ error_message: Optional[str] = None
60
+
61
+
62
+ class DocumentArchiveService(BaseService):
63
+ """Main service for document archive operations."""
64
+
65
+ # Processing limits
66
+ MAX_ARCHIVE_SIZE = 200 * 1024 * 1024 # 200MB
67
+ MAX_ITEMS_COUNT = 2000
68
+ MAX_PROCESSING_TIME = 120 # 2 minutes
69
+
70
+ def __init__(self, user: User):
71
+ super().__init__(user)
72
+ self.extraction_service = ArchiveExtractionService()
73
+ self.chunking_service = ContextualChunkingService(user)
74
+ self.vectorization_service = ArchiveVectorizationService(user)
75
+
76
+ def create_and_process_archive(
77
+ self,
78
+ uploaded_file: UploadedFile,
79
+ request_data: Dict[str, Any]
80
+ ) -> ArchiveProcessingResult:
81
+ """Create archive and process it synchronously."""
82
+
83
+ # Validate request data
84
+ try:
85
+ validated_request = ArchiveUploadRequest(**request_data)
86
+ except ValidationError as e:
87
+ raise ArchiveValidationError(
88
+ message="Invalid request data",
89
+ code="INVALID_REQUEST",
90
+ details={"validation_errors": e.errors()}
91
+ )
92
+
93
+ # Create archive record
94
+ archive = self._create_archive_record(uploaded_file, validated_request)
95
+
96
+ # Process synchronously if requested
97
+ if validated_request.process_immediately:
98
+ return self._process_archive_sync(archive, uploaded_file)
99
+ else:
100
+ return ArchiveProcessingResult(
101
+ archive_id=str(archive.id),
102
+ status=archive.processing_status,
103
+ processing_time_ms=0,
104
+ items_processed=0,
105
+ chunks_created=0,
106
+ vectorized_chunks=0,
107
+ total_cost_usd=0.0
108
+ )
109
+
110
+ def process_archive(self, archive: DocumentArchive) -> bool:
111
+ """Process an existing archive by its stored file."""
112
+
113
+ # Debug logging
114
+ logger.info(f"process_archive called with archive: {archive}, type: {type(archive)}")
115
+
116
+ if not archive:
117
+ raise ArchiveProcessingError(
118
+ message="Archive object is None",
119
+ code="ARCHIVE_IS_NONE"
120
+ )
121
+
122
+ if not archive.archive_file:
123
+ raise ArchiveProcessingError(
124
+ message="Archive has no file to process",
125
+ code="NO_FILE"
126
+ )
127
+
128
+ start_time = time.time()
129
+
130
+ try:
131
+ # Update status
132
+ archive.processing_status = ProcessingStatus.PROCESSING
133
+ archive.save()
134
+
135
+ # Get file path from the archive_file field
136
+ file_path = archive.archive_file.path
137
+
138
+ # Extract archive
139
+ extracted_items = self.extraction_service.extract_archive(
140
+ file_path,
141
+ archive.archive_type
142
+ )
143
+
144
+ # Check processing time
145
+ self._check_processing_timeout(start_time)
146
+
147
+ # Create item records
148
+ items = self._create_item_records(archive, extracted_items)
149
+
150
+ # Check processing time again
151
+ self._check_processing_timeout(start_time)
152
+
153
+ # Generate chunks
154
+ chunks = self._generate_chunks_for_items(items)
155
+
156
+ # Check processing time again
157
+ self._check_processing_timeout(start_time)
158
+
159
+ # Vectorize chunks
160
+ vectorization_result = self._vectorize_chunks(chunks)
161
+
162
+ # Update archive statistics
163
+ self._update_archive_statistics(archive, items, chunks, vectorization_result)
164
+
165
+ # Mark as completed
166
+ processing_time_ms = int((time.time() - start_time) * 1000)
167
+ archive.processing_status = ProcessingStatus.COMPLETED
168
+ archive.processed_at = timezone.now()
169
+ archive.processing_duration_ms = processing_time_ms
170
+ archive.save()
171
+
172
+ logger.info(f"Successfully processed archive {archive.id} in {processing_time_ms}ms")
173
+ return True
174
+
175
+ except ProcessingTimeoutError:
176
+ processing_time_ms = int((time.time() - start_time) * 1000)
177
+ archive.processing_status = ProcessingStatus.FAILED
178
+ archive.processing_error = "Processing timeout exceeded"
179
+ archive.processing_duration_ms = processing_time_ms
180
+ archive.save()
181
+ logger.error(f"Archive processing timeout for {archive.id}")
182
+ return False
183
+
184
+ except Exception as e:
185
+ processing_time_ms = int((time.time() - start_time) * 1000)
186
+ archive.processing_status = ProcessingStatus.FAILED
187
+ archive.processing_error = str(e)
188
+ archive.processing_duration_ms = processing_time_ms
189
+ archive.save()
190
+ logger.error(f"Archive processing failed for {archive.id}: {e}")
191
+ return False
192
+
193
+ def _create_archive_record(
194
+ self,
195
+ uploaded_file: UploadedFile,
196
+ request: ArchiveUploadRequest
197
+ ) -> DocumentArchive:
198
+ """Create initial archive record."""
199
+
200
+ # Validate file
201
+ self._validate_uploaded_file(uploaded_file)
202
+
203
+ # Generate content hash
204
+ content_hash = self._generate_file_hash(uploaded_file)
205
+
206
+ # Check for duplicates
207
+ existing = DocumentArchive.objects.filter(
208
+ user=self.user,
209
+ content_hash=content_hash
210
+ ).first()
211
+
212
+ if existing:
213
+ raise ArchiveValidationError(
214
+ message=f"Archive already exists: {existing.title}",
215
+ code="DUPLICATE_ARCHIVE",
216
+ details={"existing_archive_id": str(existing.id)}
217
+ )
218
+
219
+ # Detect archive type
220
+ archive_type = self._detect_archive_type(uploaded_file.name)
221
+
222
+ with transaction.atomic():
223
+ # Create archive record
224
+ archive = DocumentArchive.objects.create(
225
+ user=self.user,
226
+ title=request.title,
227
+ description=request.description,
228
+ original_filename=uploaded_file.name,
229
+ file_size=uploaded_file.size,
230
+ archive_type=archive_type,
231
+ content_hash=content_hash,
232
+ is_public=request.is_public,
233
+ processing_status=ProcessingStatus.PENDING
234
+ )
235
+
236
+ # Add categories
237
+ if request.category_ids:
238
+ categories = DocumentCategory.objects.filter(
239
+ id__in=request.category_ids
240
+ )
241
+ archive.categories.set(categories)
242
+
243
+ return archive
244
+
245
+ def _process_archive_sync(
246
+ self,
247
+ archive: DocumentArchive,
248
+ uploaded_file: UploadedFile
249
+ ) -> ArchiveProcessingResult:
250
+ """Process archive synchronously with time limits."""
251
+
252
+ start_time = time.time()
253
+
254
+ try:
255
+ # Update status
256
+ archive.processing_status = ProcessingStatus.PROCESSING
257
+ archive.save()
258
+
259
+ # Save file temporarily
260
+ temp_file_path = self._save_temp_file(uploaded_file, archive.id)
261
+
262
+ try:
263
+ # Extract archive
264
+ extracted_items = self.extraction_service.extract_archive(
265
+ temp_file_path,
266
+ archive.archive_type
267
+ )
268
+
269
+ # Check processing time
270
+ self._check_processing_timeout(start_time)
271
+
272
+ # Create item records
273
+ items = self._create_item_records(archive, extracted_items)
274
+
275
+ # Check processing time
276
+ self._check_processing_timeout(start_time)
277
+
278
+ # Generate chunks
279
+ all_chunks = self._generate_chunks_for_items(items)
280
+
281
+ # Check processing time
282
+ self._check_processing_timeout(start_time)
283
+
284
+ # Vectorize chunks
285
+ vectorization_result = self._vectorize_chunks(all_chunks)
286
+
287
+ # Update archive statistics
288
+ self._update_archive_statistics(
289
+ archive,
290
+ items,
291
+ all_chunks,
292
+ vectorization_result
293
+ )
294
+
295
+ # Mark as completed
296
+ processing_time_ms = int((time.time() - start_time) * 1000)
297
+ archive.processing_status = ProcessingStatus.COMPLETED
298
+ archive.processed_at = timezone.now()
299
+ archive.processing_duration_ms = processing_time_ms
300
+ archive.save()
301
+
302
+ return ArchiveProcessingResult(
303
+ archive_id=str(archive.id),
304
+ status=archive.processing_status,
305
+ processing_time_ms=processing_time_ms,
306
+ items_processed=len(items),
307
+ chunks_created=len(all_chunks),
308
+ vectorized_chunks=vectorization_result['vectorized_count'],
309
+ total_cost_usd=vectorization_result['total_cost']
310
+ )
311
+
312
+ finally:
313
+ # Always cleanup temp file
314
+ self._cleanup_temp_file(temp_file_path)
315
+
316
+ except Exception as e:
317
+ # Mark as failed
318
+ processing_time_ms = int((time.time() - start_time) * 1000)
319
+ archive.processing_status = ProcessingStatus.FAILED
320
+ archive.processing_error = str(e)
321
+ archive.processing_duration_ms = processing_time_ms
322
+ archive.save()
323
+
324
+ return ArchiveProcessingResult(
325
+ archive_id=str(archive.id),
326
+ status=archive.processing_status,
327
+ processing_time_ms=processing_time_ms,
328
+ items_processed=0,
329
+ chunks_created=0,
330
+ vectorized_chunks=0,
331
+ total_cost_usd=0.0,
332
+ error_message=str(e)
333
+ )
334
+
335
+ def _validate_uploaded_file(self, uploaded_file: UploadedFile) -> None:
336
+ """Validate uploaded archive file."""
337
+
338
+ # Size check
339
+ if uploaded_file.size > self.MAX_ARCHIVE_SIZE:
340
+ raise ArchiveValidationError(
341
+ message=f"Archive too large: {uploaded_file.size} bytes",
342
+ code="ARCHIVE_TOO_LARGE",
343
+ details={
344
+ "file_size": uploaded_file.size,
345
+ "max_size": self.MAX_ARCHIVE_SIZE
346
+ }
347
+ )
348
+
349
+ # Type check
350
+ archive_type = self._detect_archive_type(uploaded_file.name)
351
+ if not archive_type:
352
+ raise ArchiveValidationError(
353
+ message=f"Unsupported archive format: {uploaded_file.name}",
354
+ code="UNSUPPORTED_FORMAT",
355
+ details={"filename": uploaded_file.name}
356
+ )
357
+
358
+ def _detect_archive_type(self, filename: str) -> Optional[str]:
359
+ """Detect archive type from filename."""
360
+ filename_lower = filename.lower()
361
+
362
+ if filename_lower.endswith('.zip'):
363
+ return ArchiveType.ZIP
364
+ elif filename_lower.endswith(('.tar.gz', '.tgz')):
365
+ return ArchiveType.TAR_GZ
366
+ elif filename_lower.endswith(('.tar.bz2', '.tbz2')):
367
+ return ArchiveType.TAR_BZ2
368
+ elif filename_lower.endswith('.tar'):
369
+ return ArchiveType.TAR
370
+
371
+ return None
372
+
373
+ def _generate_file_hash(self, uploaded_file: UploadedFile) -> str:
374
+ """Generate SHA-256 hash of uploaded file."""
375
+ hash_sha256 = hashlib.sha256()
376
+
377
+ # Reset file pointer
378
+ uploaded_file.seek(0)
379
+
380
+ for chunk in uploaded_file.chunks():
381
+ hash_sha256.update(chunk)
382
+
383
+ # Reset file pointer again
384
+ uploaded_file.seek(0)
385
+
386
+ return hash_sha256.hexdigest()
387
+
388
+ def _save_temp_file(self, uploaded_file: UploadedFile, archive_id: str) -> str:
389
+ """Save uploaded file to temporary location."""
390
+ temp_dir = tempfile.mkdtemp(prefix=f'archive_{archive_id}_')
391
+ temp_path = os.path.join(temp_dir, uploaded_file.name)
392
+
393
+ with open(temp_path, 'wb') as f:
394
+ for chunk in uploaded_file.chunks():
395
+ f.write(chunk)
396
+
397
+ return temp_path
398
+
399
+ def _cleanup_temp_file(self, temp_file_path: str) -> None:
400
+ """Clean up temporary file and directory."""
401
+ if os.path.exists(temp_file_path):
402
+ os.unlink(temp_file_path)
403
+
404
+ # Remove directory if empty
405
+ temp_dir = os.path.dirname(temp_file_path)
406
+ try:
407
+ os.rmdir(temp_dir)
408
+ except OSError:
409
+ pass # Directory not empty or other error
410
+
411
+ def _check_processing_timeout(self, start_time: float) -> None:
412
+ """Check if processing has exceeded time limit."""
413
+ elapsed = time.time() - start_time
414
+ if elapsed > self.MAX_PROCESSING_TIME:
415
+ raise ProcessingTimeoutError(
416
+ message=f"Processing timeout after {elapsed:.1f} seconds",
417
+ code="PROCESSING_TIMEOUT",
418
+ details={
419
+ "elapsed_seconds": elapsed,
420
+ "max_seconds": self.MAX_PROCESSING_TIME
421
+ }
422
+ )
423
+
424
+ def _create_item_records(
425
+ self,
426
+ archive: DocumentArchive,
427
+ extracted_items: List[ExtractedItemData]
428
+ ) -> List:
429
+ """Create ArchiveItem records from extracted data."""
430
+ from ...models.archive import ArchiveItem
431
+
432
+ if len(extracted_items) > self.MAX_ITEMS_COUNT:
433
+ raise ArchiveValidationError(
434
+ message=f"Too many items: {len(extracted_items)}",
435
+ code="TOO_MANY_ITEMS",
436
+ details={
437
+ "item_count": len(extracted_items),
438
+ "max_count": self.MAX_ITEMS_COUNT
439
+ }
440
+ )
441
+
442
+ items = []
443
+
444
+ # Note: Items should already be cleared by reprocess method
445
+
446
+ with transaction.atomic():
447
+ for item_data in extracted_items:
448
+ item = ArchiveItem.objects.create(
449
+ user=self.user,
450
+ archive=archive,
451
+ relative_path=item_data.relative_path,
452
+ item_name=item_data.item_name,
453
+ file_size=item_data.file_size,
454
+ raw_content=item_data.content or '',
455
+ is_processable=item_data.is_processable,
456
+ metadata=item_data.metadata
457
+ )
458
+ items.append(item)
459
+
460
+ # Update archive statistics
461
+ archive.total_items = len(items)
462
+ archive.processed_items = len(items)
463
+ archive.save()
464
+
465
+ return items
466
+
467
+ def _generate_chunks_for_items(self, items: List) -> List:
468
+ """Generate chunks for all processable items."""
469
+ all_chunks = []
470
+
471
+ for item in items:
472
+ if item.is_processable and item.raw_content:
473
+ chunks = self.chunking_service.create_chunks_with_context(item)
474
+ all_chunks.extend(chunks)
475
+
476
+ # Update item statistics
477
+ item.chunks_count = len(chunks)
478
+ item.save()
479
+
480
+ return all_chunks
481
+
482
+ def _vectorize_chunks(self, chunks: List) -> Dict[str, Any]:
483
+ """Vectorize all chunks."""
484
+ return self.vectorization_service.vectorize_chunks_batch(chunks)
485
+
486
+ def _update_archive_statistics(
487
+ self,
488
+ archive: DocumentArchive,
489
+ items: List,
490
+ chunks: List,
491
+ vectorization_result: Dict[str, Any]
492
+ ) -> None:
493
+ """Update archive with final statistics."""
494
+
495
+ total_tokens = sum(item.total_tokens for item in items)
496
+ total_cost = sum(item.processing_cost for item in items)
497
+
498
+ archive.total_chunks = len(chunks)
499
+ archive.vectorized_chunks = vectorization_result['vectorized_count']
500
+ archive.total_tokens = total_tokens
501
+ archive.total_cost_usd = total_cost
502
+ archive.save()
503
+
504
+ def get_archive_by_id(self, archive_id: str) -> Optional[DocumentArchive]:
505
+ """Get archive by ID with user access check."""
506
+ try:
507
+ archive = DocumentArchive.objects.get(id=archive_id, user=self.user)
508
+ return archive
509
+ except DocumentArchive.DoesNotExist:
510
+ return None
511
+
512
+ def list_user_archives(
513
+ self,
514
+ limit: int = 20,
515
+ offset: int = 0,
516
+ status_filter: Optional[str] = None
517
+ ) -> Dict[str, Any]:
518
+ """List user's archives with pagination."""
519
+
520
+ queryset = DocumentArchive.objects.filter(user=self.user)
521
+
522
+ if status_filter:
523
+ queryset = queryset.filter(processing_status=status_filter)
524
+
525
+ total_count = queryset.count()
526
+ archives = list(queryset.order_by('-created_at')[offset:offset + limit])
527
+
528
+ return {
529
+ 'archives': archives,
530
+ 'total_count': total_count,
531
+ 'has_more': offset + limit < total_count
532
+ }
533
+
534
+ def delete_archive(self, archive_id: str) -> bool:
535
+ """Delete archive and all related data."""
536
+ try:
537
+ archive = DocumentArchive.objects.get(id=archive_id, user=self.user)
538
+ archive.delete()
539
+ return True
540
+ except DocumentArchive.DoesNotExist:
541
+ return False