django-cfg 1.1.81__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_cfg/__init__.py +20 -448
- django_cfg/apps/accounts/README.md +3 -3
- django_cfg/apps/accounts/admin/__init__.py +0 -2
- django_cfg/apps/accounts/admin/activity.py +2 -9
- django_cfg/apps/accounts/admin/filters.py +0 -42
- django_cfg/apps/accounts/admin/inlines.py +8 -8
- django_cfg/apps/accounts/admin/otp.py +5 -5
- django_cfg/apps/accounts/admin/registration_source.py +1 -8
- django_cfg/apps/accounts/admin/user.py +12 -20
- django_cfg/apps/accounts/managers/user_manager.py +2 -129
- django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
- django_cfg/apps/accounts/models.py +3 -123
- django_cfg/apps/accounts/serializers/otp.py +40 -44
- django_cfg/apps/accounts/serializers/profile.py +0 -2
- django_cfg/apps/accounts/services/otp_service.py +98 -186
- django_cfg/apps/accounts/signals.py +25 -15
- django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
- django_cfg/apps/accounts/views/otp.py +35 -36
- django_cfg/apps/agents/README.md +129 -0
- django_cfg/apps/agents/__init__.py +68 -0
- django_cfg/apps/agents/admin/__init__.py +17 -0
- django_cfg/apps/agents/admin/execution_admin.py +460 -0
- django_cfg/apps/agents/admin/registry_admin.py +360 -0
- django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
- django_cfg/apps/agents/apps.py +29 -0
- django_cfg/apps/agents/core/__init__.py +20 -0
- django_cfg/apps/agents/core/agent.py +281 -0
- django_cfg/apps/agents/core/dependencies.py +154 -0
- django_cfg/apps/agents/core/exceptions.py +66 -0
- django_cfg/apps/agents/core/models.py +106 -0
- django_cfg/apps/agents/core/orchestrator.py +391 -0
- django_cfg/apps/agents/examples/__init__.py +3 -0
- django_cfg/apps/agents/examples/simple_example.py +161 -0
- django_cfg/apps/agents/integration/__init__.py +14 -0
- django_cfg/apps/agents/integration/middleware.py +80 -0
- django_cfg/apps/agents/integration/registry.py +345 -0
- django_cfg/apps/agents/integration/signals.py +50 -0
- django_cfg/apps/agents/management/__init__.py +3 -0
- django_cfg/apps/agents/management/commands/__init__.py +3 -0
- django_cfg/apps/agents/management/commands/create_agent.py +365 -0
- django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
- django_cfg/apps/agents/managers/__init__.py +23 -0
- django_cfg/apps/agents/managers/execution.py +236 -0
- django_cfg/apps/agents/managers/registry.py +254 -0
- django_cfg/apps/agents/managers/toolsets.py +496 -0
- django_cfg/apps/agents/migrations/0001_initial.py +286 -0
- django_cfg/apps/agents/migrations/__init__.py +5 -0
- django_cfg/apps/agents/models/__init__.py +15 -0
- django_cfg/apps/agents/models/execution.py +215 -0
- django_cfg/apps/agents/models/registry.py +220 -0
- django_cfg/apps/agents/models/toolsets.py +305 -0
- django_cfg/apps/agents/patterns/__init__.py +24 -0
- django_cfg/apps/agents/patterns/content_agents.py +234 -0
- django_cfg/apps/agents/toolsets/__init__.py +15 -0
- django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
- django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
- django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
- django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
- django_cfg/apps/agents/urls.py +46 -0
- django_cfg/apps/knowbase/README.md +150 -0
- django_cfg/apps/knowbase/__init__.py +27 -0
- django_cfg/apps/knowbase/admin/__init__.py +23 -0
- django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
- django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
- django_cfg/apps/knowbase/admin/document_admin.py +650 -0
- django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
- django_cfg/apps/knowbase/apps.py +81 -0
- django_cfg/apps/knowbase/config/README.md +176 -0
- django_cfg/apps/knowbase/config/__init__.py +51 -0
- django_cfg/apps/knowbase/config/constance_fields.py +186 -0
- django_cfg/apps/knowbase/config/constance_settings.py +200 -0
- django_cfg/apps/knowbase/config/settings.py +444 -0
- django_cfg/apps/knowbase/examples/__init__.py +3 -0
- django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
- django_cfg/apps/knowbase/management/__init__.py +0 -0
- django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
- django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
- django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
- django_cfg/apps/knowbase/managers/__init__.py +22 -0
- django_cfg/apps/knowbase/managers/archive.py +426 -0
- django_cfg/apps/knowbase/managers/base.py +32 -0
- django_cfg/apps/knowbase/managers/chat.py +141 -0
- django_cfg/apps/knowbase/managers/document.py +203 -0
- django_cfg/apps/knowbase/managers/external_data.py +471 -0
- django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
- django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
- django_cfg/apps/knowbase/migrations/__init__.py +5 -0
- django_cfg/apps/knowbase/mixins/__init__.py +15 -0
- django_cfg/apps/knowbase/mixins/config.py +108 -0
- django_cfg/apps/knowbase/mixins/creator.py +81 -0
- django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
- django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
- django_cfg/apps/knowbase/mixins/service.py +362 -0
- django_cfg/apps/knowbase/models/__init__.py +41 -0
- django_cfg/apps/knowbase/models/archive.py +599 -0
- django_cfg/apps/knowbase/models/base.py +58 -0
- django_cfg/apps/knowbase/models/chat.py +157 -0
- django_cfg/apps/knowbase/models/document.py +267 -0
- django_cfg/apps/knowbase/models/external_data.py +376 -0
- django_cfg/apps/knowbase/serializers/__init__.py +68 -0
- django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
- django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
- django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
- django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
- django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
- django_cfg/apps/knowbase/services/__init__.py +40 -0
- django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
- django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
- django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
- django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
- django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
- django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
- django_cfg/apps/knowbase/services/base.py +53 -0
- django_cfg/apps/knowbase/services/chat_service.py +239 -0
- django_cfg/apps/knowbase/services/document_service.py +144 -0
- django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
- django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
- django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
- django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
- django_cfg/apps/knowbase/services/embedding/models.py +229 -0
- django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
- django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
- django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
- django_cfg/apps/knowbase/services/search_service.py +293 -0
- django_cfg/apps/knowbase/signals/__init__.py +21 -0
- django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
- django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
- django_cfg/apps/knowbase/signals/document_signals.py +143 -0
- django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
- django_cfg/apps/knowbase/tasks/__init__.py +39 -0
- django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
- django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
- django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
- django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
- django_cfg/apps/knowbase/urls.py +43 -0
- django_cfg/apps/knowbase/utils/__init__.py +12 -0
- django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
- django_cfg/apps/knowbase/utils/text_processing.py +375 -0
- django_cfg/apps/knowbase/utils/validation.py +99 -0
- django_cfg/apps/knowbase/views/__init__.py +28 -0
- django_cfg/apps/knowbase/views/archive_views.py +469 -0
- django_cfg/apps/knowbase/views/base.py +49 -0
- django_cfg/apps/knowbase/views/chat_views.py +181 -0
- django_cfg/apps/knowbase/views/document_views.py +183 -0
- django_cfg/apps/knowbase/views/public_views.py +129 -0
- django_cfg/apps/leads/admin.py +70 -0
- django_cfg/apps/newsletter/admin.py +234 -0
- django_cfg/apps/newsletter/admin_filters.py +124 -0
- django_cfg/apps/support/admin.py +196 -0
- django_cfg/apps/support/admin_filters.py +71 -0
- django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
- django_cfg/apps/urls.py +5 -4
- django_cfg/cli/README.md +1 -1
- django_cfg/cli/commands/create_project.py +2 -2
- django_cfg/cli/commands/info.py +1 -1
- django_cfg/config.py +44 -0
- django_cfg/core/config.py +29 -82
- django_cfg/core/environment.py +1 -1
- django_cfg/core/generation.py +19 -107
- django_cfg/{integration.py → core/integration.py} +18 -16
- django_cfg/core/validation.py +1 -1
- django_cfg/management/__init__.py +1 -1
- django_cfg/management/commands/__init__.py +1 -1
- django_cfg/management/commands/auto_generate.py +482 -0
- django_cfg/management/commands/migrator.py +19 -101
- django_cfg/management/commands/test_email.py +1 -1
- django_cfg/middleware/README.md +0 -158
- django_cfg/middleware/__init__.py +0 -2
- django_cfg/middleware/user_activity.py +3 -3
- django_cfg/models/api.py +145 -0
- django_cfg/models/base.py +287 -0
- django_cfg/models/cache.py +4 -4
- django_cfg/models/constance.py +25 -88
- django_cfg/models/database.py +9 -9
- django_cfg/models/drf.py +3 -36
- django_cfg/models/email.py +163 -0
- django_cfg/models/environment.py +276 -0
- django_cfg/models/limits.py +1 -1
- django_cfg/models/logging.py +366 -0
- django_cfg/models/revolution.py +41 -2
- django_cfg/models/security.py +125 -0
- django_cfg/models/services.py +1 -1
- django_cfg/modules/__init__.py +2 -56
- django_cfg/modules/base.py +78 -52
- django_cfg/modules/django_currency/service.py +2 -2
- django_cfg/modules/django_email.py +2 -2
- django_cfg/modules/django_health.py +267 -0
- django_cfg/modules/django_llm/llm/client.py +79 -17
- django_cfg/modules/django_llm/translator/translator.py +2 -2
- django_cfg/modules/django_logger.py +2 -2
- django_cfg/modules/django_ngrok.py +2 -2
- django_cfg/modules/django_tasks.py +68 -3
- django_cfg/modules/django_telegram.py +3 -3
- django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
- django_cfg/modules/django_twilio/service.py +2 -2
- django_cfg/modules/django_twilio/simple_service.py +2 -2
- django_cfg/modules/django_twilio/templates/guide.md +266 -0
- django_cfg/modules/django_twilio/twilio_service.py +2 -2
- django_cfg/modules/django_unfold/__init__.py +69 -0
- django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
- django_cfg/modules/django_unfold/dashboard.py +278 -0
- django_cfg/modules/django_unfold/icons/README.md +145 -0
- django_cfg/modules/django_unfold/icons/__init__.py +12 -0
- django_cfg/modules/django_unfold/icons/constants.py +2851 -0
- django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
- django_cfg/modules/django_unfold/models/__init__.py +42 -0
- django_cfg/modules/django_unfold/models/config.py +601 -0
- django_cfg/modules/django_unfold/models/dashboard.py +206 -0
- django_cfg/modules/django_unfold/models/dropdown.py +40 -0
- django_cfg/modules/django_unfold/models/navigation.py +73 -0
- django_cfg/modules/django_unfold/models/tabs.py +25 -0
- django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
- django_cfg/modules/django_unfold/utils.py +140 -0
- django_cfg/registry/__init__.py +23 -0
- django_cfg/registry/core.py +61 -0
- django_cfg/registry/exceptions.py +11 -0
- django_cfg/registry/modules.py +12 -0
- django_cfg/registry/services.py +26 -0
- django_cfg/registry/third_party.py +52 -0
- django_cfg/routing/__init__.py +19 -0
- django_cfg/routing/callbacks.py +198 -0
- django_cfg/routing/routers.py +48 -0
- django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
- django_cfg/templatetags/__init__.py +0 -0
- django_cfg/templatetags/django_cfg.py +33 -0
- django_cfg/urls.py +33 -0
- django_cfg/utils/path_resolution.py +1 -1
- django_cfg/utils/smart_defaults.py +7 -61
- django_cfg/utils/toolkit.py +663 -0
- {django_cfg-1.1.81.dist-info → django_cfg-1.2.0.dist-info}/METADATA +83 -86
- django_cfg-1.2.0.dist-info/RECORD +441 -0
- django_cfg/apps/tasks/@docs/README.md +0 -195
- django_cfg/archive/django_sample.zip +0 -0
- django_cfg/models/unfold.py +0 -271
- django_cfg/modules/unfold/__init__.py +0 -29
- django_cfg/modules/unfold/dashboard.py +0 -318
- django_cfg/pyproject.toml +0 -370
- django_cfg/routers.py +0 -83
- django_cfg-1.1.81.dist-info/RECORD +0 -278
- /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
- /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
- /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
- /django_cfg/{version_check.py → utils/version_check.py} +0 -0
- {django_cfg-1.1.81.dist-info → django_cfg-1.2.0.dist-info}/WHEEL +0 -0
- {django_cfg-1.1.81.dist-info → django_cfg-1.2.0.dist-info}/entry_points.txt +0 -0
- {django_cfg-1.1.81.dist-info → django_cfg-1.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,541 @@
|
|
1
|
+
"""
|
2
|
+
Main document archive service.
|
3
|
+
|
4
|
+
Orchestrates the complete archive processing pipeline with synchronous processing.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import os
|
8
|
+
import time
|
9
|
+
import tempfile
|
10
|
+
import hashlib
|
11
|
+
import logging
|
12
|
+
from typing import List, Dict, Any, Optional
|
13
|
+
from django.core.files.uploadedfile import UploadedFile
|
14
|
+
from django.contrib.auth import get_user_model
|
15
|
+
from django.db import transaction
|
16
|
+
from django.utils import timezone
|
17
|
+
from pydantic import BaseModel, Field, ValidationError
|
18
|
+
|
19
|
+
from ...models.archive import DocumentArchive, ArchiveType, ContentType
|
20
|
+
from ...models.document import DocumentCategory
|
21
|
+
from ...models.base import ProcessingStatus
|
22
|
+
from ..base import BaseService
|
23
|
+
from .exceptions import (
|
24
|
+
ArchiveValidationError,
|
25
|
+
ArchiveProcessingError,
|
26
|
+
ProcessingTimeoutError
|
27
|
+
)
|
28
|
+
from .extraction_service import ArchiveExtractionService, ExtractedItemData
|
29
|
+
from .chunking_service import ContextualChunkingService
|
30
|
+
from .vectorization_service import ArchiveVectorizationService
|
31
|
+
|
32
|
+
User = get_user_model()
|
33
|
+
logger = logging.getLogger(__name__)
|
34
|
+
|
35
|
+
|
36
|
+
class ArchiveUploadRequest(BaseModel):
|
37
|
+
"""Pydantic model for archive upload validation."""
|
38
|
+
|
39
|
+
title: str = Field(..., min_length=1, max_length=512)
|
40
|
+
description: Optional[str] = Field(None, max_length=2000)
|
41
|
+
category_ids: List[str] = Field(default_factory=list)
|
42
|
+
is_public: bool = Field(default=True)
|
43
|
+
process_immediately: bool = Field(default=True)
|
44
|
+
|
45
|
+
class Config:
|
46
|
+
str_strip_whitespace = True
|
47
|
+
|
48
|
+
|
49
|
+
class ArchiveProcessingResult(BaseModel):
|
50
|
+
"""Result of archive processing operation."""
|
51
|
+
|
52
|
+
archive_id: str
|
53
|
+
status: str
|
54
|
+
processing_time_ms: int
|
55
|
+
items_processed: int
|
56
|
+
chunks_created: int
|
57
|
+
vectorized_chunks: int
|
58
|
+
total_cost_usd: float
|
59
|
+
error_message: Optional[str] = None
|
60
|
+
|
61
|
+
|
62
|
+
class DocumentArchiveService(BaseService):
|
63
|
+
"""Main service for document archive operations."""
|
64
|
+
|
65
|
+
# Processing limits
|
66
|
+
MAX_ARCHIVE_SIZE = 200 * 1024 * 1024 # 200MB
|
67
|
+
MAX_ITEMS_COUNT = 2000
|
68
|
+
MAX_PROCESSING_TIME = 120 # 2 minutes
|
69
|
+
|
70
|
+
def __init__(self, user: User):
|
71
|
+
super().__init__(user)
|
72
|
+
self.extraction_service = ArchiveExtractionService()
|
73
|
+
self.chunking_service = ContextualChunkingService(user)
|
74
|
+
self.vectorization_service = ArchiveVectorizationService(user)
|
75
|
+
|
76
|
+
def create_and_process_archive(
|
77
|
+
self,
|
78
|
+
uploaded_file: UploadedFile,
|
79
|
+
request_data: Dict[str, Any]
|
80
|
+
) -> ArchiveProcessingResult:
|
81
|
+
"""Create archive and process it synchronously."""
|
82
|
+
|
83
|
+
# Validate request data
|
84
|
+
try:
|
85
|
+
validated_request = ArchiveUploadRequest(**request_data)
|
86
|
+
except ValidationError as e:
|
87
|
+
raise ArchiveValidationError(
|
88
|
+
message="Invalid request data",
|
89
|
+
code="INVALID_REQUEST",
|
90
|
+
details={"validation_errors": e.errors()}
|
91
|
+
)
|
92
|
+
|
93
|
+
# Create archive record
|
94
|
+
archive = self._create_archive_record(uploaded_file, validated_request)
|
95
|
+
|
96
|
+
# Process synchronously if requested
|
97
|
+
if validated_request.process_immediately:
|
98
|
+
return self._process_archive_sync(archive, uploaded_file)
|
99
|
+
else:
|
100
|
+
return ArchiveProcessingResult(
|
101
|
+
archive_id=str(archive.id),
|
102
|
+
status=archive.processing_status,
|
103
|
+
processing_time_ms=0,
|
104
|
+
items_processed=0,
|
105
|
+
chunks_created=0,
|
106
|
+
vectorized_chunks=0,
|
107
|
+
total_cost_usd=0.0
|
108
|
+
)
|
109
|
+
|
110
|
+
def process_archive(self, archive: DocumentArchive) -> bool:
|
111
|
+
"""Process an existing archive by its stored file."""
|
112
|
+
|
113
|
+
# Debug logging
|
114
|
+
logger.info(f"process_archive called with archive: {archive}, type: {type(archive)}")
|
115
|
+
|
116
|
+
if not archive:
|
117
|
+
raise ArchiveProcessingError(
|
118
|
+
message="Archive object is None",
|
119
|
+
code="ARCHIVE_IS_NONE"
|
120
|
+
)
|
121
|
+
|
122
|
+
if not archive.archive_file:
|
123
|
+
raise ArchiveProcessingError(
|
124
|
+
message="Archive has no file to process",
|
125
|
+
code="NO_FILE"
|
126
|
+
)
|
127
|
+
|
128
|
+
start_time = time.time()
|
129
|
+
|
130
|
+
try:
|
131
|
+
# Update status
|
132
|
+
archive.processing_status = ProcessingStatus.PROCESSING
|
133
|
+
archive.save()
|
134
|
+
|
135
|
+
# Get file path from the archive_file field
|
136
|
+
file_path = archive.archive_file.path
|
137
|
+
|
138
|
+
# Extract archive
|
139
|
+
extracted_items = self.extraction_service.extract_archive(
|
140
|
+
file_path,
|
141
|
+
archive.archive_type
|
142
|
+
)
|
143
|
+
|
144
|
+
# Check processing time
|
145
|
+
self._check_processing_timeout(start_time)
|
146
|
+
|
147
|
+
# Create item records
|
148
|
+
items = self._create_item_records(archive, extracted_items)
|
149
|
+
|
150
|
+
# Check processing time again
|
151
|
+
self._check_processing_timeout(start_time)
|
152
|
+
|
153
|
+
# Generate chunks
|
154
|
+
chunks = self._generate_chunks_for_items(items)
|
155
|
+
|
156
|
+
# Check processing time again
|
157
|
+
self._check_processing_timeout(start_time)
|
158
|
+
|
159
|
+
# Vectorize chunks
|
160
|
+
vectorization_result = self._vectorize_chunks(chunks)
|
161
|
+
|
162
|
+
# Update archive statistics
|
163
|
+
self._update_archive_statistics(archive, items, chunks, vectorization_result)
|
164
|
+
|
165
|
+
# Mark as completed
|
166
|
+
processing_time_ms = int((time.time() - start_time) * 1000)
|
167
|
+
archive.processing_status = ProcessingStatus.COMPLETED
|
168
|
+
archive.processed_at = timezone.now()
|
169
|
+
archive.processing_duration_ms = processing_time_ms
|
170
|
+
archive.save()
|
171
|
+
|
172
|
+
logger.info(f"Successfully processed archive {archive.id} in {processing_time_ms}ms")
|
173
|
+
return True
|
174
|
+
|
175
|
+
except ProcessingTimeoutError:
|
176
|
+
processing_time_ms = int((time.time() - start_time) * 1000)
|
177
|
+
archive.processing_status = ProcessingStatus.FAILED
|
178
|
+
archive.processing_error = "Processing timeout exceeded"
|
179
|
+
archive.processing_duration_ms = processing_time_ms
|
180
|
+
archive.save()
|
181
|
+
logger.error(f"Archive processing timeout for {archive.id}")
|
182
|
+
return False
|
183
|
+
|
184
|
+
except Exception as e:
|
185
|
+
processing_time_ms = int((time.time() - start_time) * 1000)
|
186
|
+
archive.processing_status = ProcessingStatus.FAILED
|
187
|
+
archive.processing_error = str(e)
|
188
|
+
archive.processing_duration_ms = processing_time_ms
|
189
|
+
archive.save()
|
190
|
+
logger.error(f"Archive processing failed for {archive.id}: {e}")
|
191
|
+
return False
|
192
|
+
|
193
|
+
def _create_archive_record(
|
194
|
+
self,
|
195
|
+
uploaded_file: UploadedFile,
|
196
|
+
request: ArchiveUploadRequest
|
197
|
+
) -> DocumentArchive:
|
198
|
+
"""Create initial archive record."""
|
199
|
+
|
200
|
+
# Validate file
|
201
|
+
self._validate_uploaded_file(uploaded_file)
|
202
|
+
|
203
|
+
# Generate content hash
|
204
|
+
content_hash = self._generate_file_hash(uploaded_file)
|
205
|
+
|
206
|
+
# Check for duplicates
|
207
|
+
existing = DocumentArchive.objects.filter(
|
208
|
+
user=self.user,
|
209
|
+
content_hash=content_hash
|
210
|
+
).first()
|
211
|
+
|
212
|
+
if existing:
|
213
|
+
raise ArchiveValidationError(
|
214
|
+
message=f"Archive already exists: {existing.title}",
|
215
|
+
code="DUPLICATE_ARCHIVE",
|
216
|
+
details={"existing_archive_id": str(existing.id)}
|
217
|
+
)
|
218
|
+
|
219
|
+
# Detect archive type
|
220
|
+
archive_type = self._detect_archive_type(uploaded_file.name)
|
221
|
+
|
222
|
+
with transaction.atomic():
|
223
|
+
# Create archive record
|
224
|
+
archive = DocumentArchive.objects.create(
|
225
|
+
user=self.user,
|
226
|
+
title=request.title,
|
227
|
+
description=request.description,
|
228
|
+
original_filename=uploaded_file.name,
|
229
|
+
file_size=uploaded_file.size,
|
230
|
+
archive_type=archive_type,
|
231
|
+
content_hash=content_hash,
|
232
|
+
is_public=request.is_public,
|
233
|
+
processing_status=ProcessingStatus.PENDING
|
234
|
+
)
|
235
|
+
|
236
|
+
# Add categories
|
237
|
+
if request.category_ids:
|
238
|
+
categories = DocumentCategory.objects.filter(
|
239
|
+
id__in=request.category_ids
|
240
|
+
)
|
241
|
+
archive.categories.set(categories)
|
242
|
+
|
243
|
+
return archive
|
244
|
+
|
245
|
+
def _process_archive_sync(
|
246
|
+
self,
|
247
|
+
archive: DocumentArchive,
|
248
|
+
uploaded_file: UploadedFile
|
249
|
+
) -> ArchiveProcessingResult:
|
250
|
+
"""Process archive synchronously with time limits."""
|
251
|
+
|
252
|
+
start_time = time.time()
|
253
|
+
|
254
|
+
try:
|
255
|
+
# Update status
|
256
|
+
archive.processing_status = ProcessingStatus.PROCESSING
|
257
|
+
archive.save()
|
258
|
+
|
259
|
+
# Save file temporarily
|
260
|
+
temp_file_path = self._save_temp_file(uploaded_file, archive.id)
|
261
|
+
|
262
|
+
try:
|
263
|
+
# Extract archive
|
264
|
+
extracted_items = self.extraction_service.extract_archive(
|
265
|
+
temp_file_path,
|
266
|
+
archive.archive_type
|
267
|
+
)
|
268
|
+
|
269
|
+
# Check processing time
|
270
|
+
self._check_processing_timeout(start_time)
|
271
|
+
|
272
|
+
# Create item records
|
273
|
+
items = self._create_item_records(archive, extracted_items)
|
274
|
+
|
275
|
+
# Check processing time
|
276
|
+
self._check_processing_timeout(start_time)
|
277
|
+
|
278
|
+
# Generate chunks
|
279
|
+
all_chunks = self._generate_chunks_for_items(items)
|
280
|
+
|
281
|
+
# Check processing time
|
282
|
+
self._check_processing_timeout(start_time)
|
283
|
+
|
284
|
+
# Vectorize chunks
|
285
|
+
vectorization_result = self._vectorize_chunks(all_chunks)
|
286
|
+
|
287
|
+
# Update archive statistics
|
288
|
+
self._update_archive_statistics(
|
289
|
+
archive,
|
290
|
+
items,
|
291
|
+
all_chunks,
|
292
|
+
vectorization_result
|
293
|
+
)
|
294
|
+
|
295
|
+
# Mark as completed
|
296
|
+
processing_time_ms = int((time.time() - start_time) * 1000)
|
297
|
+
archive.processing_status = ProcessingStatus.COMPLETED
|
298
|
+
archive.processed_at = timezone.now()
|
299
|
+
archive.processing_duration_ms = processing_time_ms
|
300
|
+
archive.save()
|
301
|
+
|
302
|
+
return ArchiveProcessingResult(
|
303
|
+
archive_id=str(archive.id),
|
304
|
+
status=archive.processing_status,
|
305
|
+
processing_time_ms=processing_time_ms,
|
306
|
+
items_processed=len(items),
|
307
|
+
chunks_created=len(all_chunks),
|
308
|
+
vectorized_chunks=vectorization_result['vectorized_count'],
|
309
|
+
total_cost_usd=vectorization_result['total_cost']
|
310
|
+
)
|
311
|
+
|
312
|
+
finally:
|
313
|
+
# Always cleanup temp file
|
314
|
+
self._cleanup_temp_file(temp_file_path)
|
315
|
+
|
316
|
+
except Exception as e:
|
317
|
+
# Mark as failed
|
318
|
+
processing_time_ms = int((time.time() - start_time) * 1000)
|
319
|
+
archive.processing_status = ProcessingStatus.FAILED
|
320
|
+
archive.processing_error = str(e)
|
321
|
+
archive.processing_duration_ms = processing_time_ms
|
322
|
+
archive.save()
|
323
|
+
|
324
|
+
return ArchiveProcessingResult(
|
325
|
+
archive_id=str(archive.id),
|
326
|
+
status=archive.processing_status,
|
327
|
+
processing_time_ms=processing_time_ms,
|
328
|
+
items_processed=0,
|
329
|
+
chunks_created=0,
|
330
|
+
vectorized_chunks=0,
|
331
|
+
total_cost_usd=0.0,
|
332
|
+
error_message=str(e)
|
333
|
+
)
|
334
|
+
|
335
|
+
def _validate_uploaded_file(self, uploaded_file: UploadedFile) -> None:
|
336
|
+
"""Validate uploaded archive file."""
|
337
|
+
|
338
|
+
# Size check
|
339
|
+
if uploaded_file.size > self.MAX_ARCHIVE_SIZE:
|
340
|
+
raise ArchiveValidationError(
|
341
|
+
message=f"Archive too large: {uploaded_file.size} bytes",
|
342
|
+
code="ARCHIVE_TOO_LARGE",
|
343
|
+
details={
|
344
|
+
"file_size": uploaded_file.size,
|
345
|
+
"max_size": self.MAX_ARCHIVE_SIZE
|
346
|
+
}
|
347
|
+
)
|
348
|
+
|
349
|
+
# Type check
|
350
|
+
archive_type = self._detect_archive_type(uploaded_file.name)
|
351
|
+
if not archive_type:
|
352
|
+
raise ArchiveValidationError(
|
353
|
+
message=f"Unsupported archive format: {uploaded_file.name}",
|
354
|
+
code="UNSUPPORTED_FORMAT",
|
355
|
+
details={"filename": uploaded_file.name}
|
356
|
+
)
|
357
|
+
|
358
|
+
def _detect_archive_type(self, filename: str) -> Optional[str]:
|
359
|
+
"""Detect archive type from filename."""
|
360
|
+
filename_lower = filename.lower()
|
361
|
+
|
362
|
+
if filename_lower.endswith('.zip'):
|
363
|
+
return ArchiveType.ZIP
|
364
|
+
elif filename_lower.endswith(('.tar.gz', '.tgz')):
|
365
|
+
return ArchiveType.TAR_GZ
|
366
|
+
elif filename_lower.endswith(('.tar.bz2', '.tbz2')):
|
367
|
+
return ArchiveType.TAR_BZ2
|
368
|
+
elif filename_lower.endswith('.tar'):
|
369
|
+
return ArchiveType.TAR
|
370
|
+
|
371
|
+
return None
|
372
|
+
|
373
|
+
def _generate_file_hash(self, uploaded_file: UploadedFile) -> str:
|
374
|
+
"""Generate SHA-256 hash of uploaded file."""
|
375
|
+
hash_sha256 = hashlib.sha256()
|
376
|
+
|
377
|
+
# Reset file pointer
|
378
|
+
uploaded_file.seek(0)
|
379
|
+
|
380
|
+
for chunk in uploaded_file.chunks():
|
381
|
+
hash_sha256.update(chunk)
|
382
|
+
|
383
|
+
# Reset file pointer again
|
384
|
+
uploaded_file.seek(0)
|
385
|
+
|
386
|
+
return hash_sha256.hexdigest()
|
387
|
+
|
388
|
+
def _save_temp_file(self, uploaded_file: UploadedFile, archive_id: str) -> str:
|
389
|
+
"""Save uploaded file to temporary location."""
|
390
|
+
temp_dir = tempfile.mkdtemp(prefix=f'archive_{archive_id}_')
|
391
|
+
temp_path = os.path.join(temp_dir, uploaded_file.name)
|
392
|
+
|
393
|
+
with open(temp_path, 'wb') as f:
|
394
|
+
for chunk in uploaded_file.chunks():
|
395
|
+
f.write(chunk)
|
396
|
+
|
397
|
+
return temp_path
|
398
|
+
|
399
|
+
def _cleanup_temp_file(self, temp_file_path: str) -> None:
|
400
|
+
"""Clean up temporary file and directory."""
|
401
|
+
if os.path.exists(temp_file_path):
|
402
|
+
os.unlink(temp_file_path)
|
403
|
+
|
404
|
+
# Remove directory if empty
|
405
|
+
temp_dir = os.path.dirname(temp_file_path)
|
406
|
+
try:
|
407
|
+
os.rmdir(temp_dir)
|
408
|
+
except OSError:
|
409
|
+
pass # Directory not empty or other error
|
410
|
+
|
411
|
+
def _check_processing_timeout(self, start_time: float) -> None:
|
412
|
+
"""Check if processing has exceeded time limit."""
|
413
|
+
elapsed = time.time() - start_time
|
414
|
+
if elapsed > self.MAX_PROCESSING_TIME:
|
415
|
+
raise ProcessingTimeoutError(
|
416
|
+
message=f"Processing timeout after {elapsed:.1f} seconds",
|
417
|
+
code="PROCESSING_TIMEOUT",
|
418
|
+
details={
|
419
|
+
"elapsed_seconds": elapsed,
|
420
|
+
"max_seconds": self.MAX_PROCESSING_TIME
|
421
|
+
}
|
422
|
+
)
|
423
|
+
|
424
|
+
def _create_item_records(
|
425
|
+
self,
|
426
|
+
archive: DocumentArchive,
|
427
|
+
extracted_items: List[ExtractedItemData]
|
428
|
+
) -> List:
|
429
|
+
"""Create ArchiveItem records from extracted data."""
|
430
|
+
from ...models.archive import ArchiveItem
|
431
|
+
|
432
|
+
if len(extracted_items) > self.MAX_ITEMS_COUNT:
|
433
|
+
raise ArchiveValidationError(
|
434
|
+
message=f"Too many items: {len(extracted_items)}",
|
435
|
+
code="TOO_MANY_ITEMS",
|
436
|
+
details={
|
437
|
+
"item_count": len(extracted_items),
|
438
|
+
"max_count": self.MAX_ITEMS_COUNT
|
439
|
+
}
|
440
|
+
)
|
441
|
+
|
442
|
+
items = []
|
443
|
+
|
444
|
+
# Note: Items should already be cleared by reprocess method
|
445
|
+
|
446
|
+
with transaction.atomic():
|
447
|
+
for item_data in extracted_items:
|
448
|
+
item = ArchiveItem.objects.create(
|
449
|
+
user=self.user,
|
450
|
+
archive=archive,
|
451
|
+
relative_path=item_data.relative_path,
|
452
|
+
item_name=item_data.item_name,
|
453
|
+
file_size=item_data.file_size,
|
454
|
+
raw_content=item_data.content or '',
|
455
|
+
is_processable=item_data.is_processable,
|
456
|
+
metadata=item_data.metadata
|
457
|
+
)
|
458
|
+
items.append(item)
|
459
|
+
|
460
|
+
# Update archive statistics
|
461
|
+
archive.total_items = len(items)
|
462
|
+
archive.processed_items = len(items)
|
463
|
+
archive.save()
|
464
|
+
|
465
|
+
return items
|
466
|
+
|
467
|
+
def _generate_chunks_for_items(self, items: List) -> List:
|
468
|
+
"""Generate chunks for all processable items."""
|
469
|
+
all_chunks = []
|
470
|
+
|
471
|
+
for item in items:
|
472
|
+
if item.is_processable and item.raw_content:
|
473
|
+
chunks = self.chunking_service.create_chunks_with_context(item)
|
474
|
+
all_chunks.extend(chunks)
|
475
|
+
|
476
|
+
# Update item statistics
|
477
|
+
item.chunks_count = len(chunks)
|
478
|
+
item.save()
|
479
|
+
|
480
|
+
return all_chunks
|
481
|
+
|
482
|
+
def _vectorize_chunks(self, chunks: List) -> Dict[str, Any]:
|
483
|
+
"""Vectorize all chunks."""
|
484
|
+
return self.vectorization_service.vectorize_chunks_batch(chunks)
|
485
|
+
|
486
|
+
def _update_archive_statistics(
|
487
|
+
self,
|
488
|
+
archive: DocumentArchive,
|
489
|
+
items: List,
|
490
|
+
chunks: List,
|
491
|
+
vectorization_result: Dict[str, Any]
|
492
|
+
) -> None:
|
493
|
+
"""Update archive with final statistics."""
|
494
|
+
|
495
|
+
total_tokens = sum(item.total_tokens for item in items)
|
496
|
+
total_cost = sum(item.processing_cost for item in items)
|
497
|
+
|
498
|
+
archive.total_chunks = len(chunks)
|
499
|
+
archive.vectorized_chunks = vectorization_result['vectorized_count']
|
500
|
+
archive.total_tokens = total_tokens
|
501
|
+
archive.total_cost_usd = total_cost
|
502
|
+
archive.save()
|
503
|
+
|
504
|
+
def get_archive_by_id(self, archive_id: str) -> Optional[DocumentArchive]:
|
505
|
+
"""Get archive by ID with user access check."""
|
506
|
+
try:
|
507
|
+
archive = DocumentArchive.objects.get(id=archive_id, user=self.user)
|
508
|
+
return archive
|
509
|
+
except DocumentArchive.DoesNotExist:
|
510
|
+
return None
|
511
|
+
|
512
|
+
def list_user_archives(
|
513
|
+
self,
|
514
|
+
limit: int = 20,
|
515
|
+
offset: int = 0,
|
516
|
+
status_filter: Optional[str] = None
|
517
|
+
) -> Dict[str, Any]:
|
518
|
+
"""List user's archives with pagination."""
|
519
|
+
|
520
|
+
queryset = DocumentArchive.objects.filter(user=self.user)
|
521
|
+
|
522
|
+
if status_filter:
|
523
|
+
queryset = queryset.filter(processing_status=status_filter)
|
524
|
+
|
525
|
+
total_count = queryset.count()
|
526
|
+
archives = list(queryset.order_by('-created_at')[offset:offset + limit])
|
527
|
+
|
528
|
+
return {
|
529
|
+
'archives': archives,
|
530
|
+
'total_count': total_count,
|
531
|
+
'has_more': offset + limit < total_count
|
532
|
+
}
|
533
|
+
|
534
|
+
def delete_archive(self, archive_id: str) -> bool:
|
535
|
+
"""Delete archive and all related data."""
|
536
|
+
try:
|
537
|
+
archive = DocumentArchive.objects.get(id=archive_id, user=self.user)
|
538
|
+
archive.delete()
|
539
|
+
return True
|
540
|
+
except DocumentArchive.DoesNotExist:
|
541
|
+
return False
|