django-cfg 1.1.82__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_cfg/__init__.py +20 -448
- django_cfg/apps/accounts/README.md +3 -3
- django_cfg/apps/accounts/admin/__init__.py +0 -2
- django_cfg/apps/accounts/admin/activity.py +2 -9
- django_cfg/apps/accounts/admin/filters.py +0 -42
- django_cfg/apps/accounts/admin/inlines.py +8 -8
- django_cfg/apps/accounts/admin/otp.py +5 -5
- django_cfg/apps/accounts/admin/registration_source.py +1 -8
- django_cfg/apps/accounts/admin/user.py +12 -20
- django_cfg/apps/accounts/managers/user_manager.py +2 -129
- django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
- django_cfg/apps/accounts/models.py +3 -123
- django_cfg/apps/accounts/serializers/otp.py +40 -44
- django_cfg/apps/accounts/serializers/profile.py +0 -2
- django_cfg/apps/accounts/services/otp_service.py +98 -186
- django_cfg/apps/accounts/signals.py +25 -15
- django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
- django_cfg/apps/accounts/views/otp.py +35 -36
- django_cfg/apps/agents/README.md +129 -0
- django_cfg/apps/agents/__init__.py +68 -0
- django_cfg/apps/agents/admin/__init__.py +17 -0
- django_cfg/apps/agents/admin/execution_admin.py +460 -0
- django_cfg/apps/agents/admin/registry_admin.py +360 -0
- django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
- django_cfg/apps/agents/apps.py +29 -0
- django_cfg/apps/agents/core/__init__.py +20 -0
- django_cfg/apps/agents/core/agent.py +281 -0
- django_cfg/apps/agents/core/dependencies.py +154 -0
- django_cfg/apps/agents/core/exceptions.py +66 -0
- django_cfg/apps/agents/core/models.py +106 -0
- django_cfg/apps/agents/core/orchestrator.py +391 -0
- django_cfg/apps/agents/examples/__init__.py +3 -0
- django_cfg/apps/agents/examples/simple_example.py +161 -0
- django_cfg/apps/agents/integration/__init__.py +14 -0
- django_cfg/apps/agents/integration/middleware.py +80 -0
- django_cfg/apps/agents/integration/registry.py +345 -0
- django_cfg/apps/agents/integration/signals.py +50 -0
- django_cfg/apps/agents/management/__init__.py +3 -0
- django_cfg/apps/agents/management/commands/__init__.py +3 -0
- django_cfg/apps/agents/management/commands/create_agent.py +365 -0
- django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
- django_cfg/apps/agents/managers/__init__.py +23 -0
- django_cfg/apps/agents/managers/execution.py +236 -0
- django_cfg/apps/agents/managers/registry.py +254 -0
- django_cfg/apps/agents/managers/toolsets.py +496 -0
- django_cfg/apps/agents/migrations/0001_initial.py +286 -0
- django_cfg/apps/agents/migrations/__init__.py +5 -0
- django_cfg/apps/agents/models/__init__.py +15 -0
- django_cfg/apps/agents/models/execution.py +215 -0
- django_cfg/apps/agents/models/registry.py +220 -0
- django_cfg/apps/agents/models/toolsets.py +305 -0
- django_cfg/apps/agents/patterns/__init__.py +24 -0
- django_cfg/apps/agents/patterns/content_agents.py +234 -0
- django_cfg/apps/agents/toolsets/__init__.py +15 -0
- django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
- django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
- django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
- django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
- django_cfg/apps/agents/urls.py +46 -0
- django_cfg/apps/knowbase/README.md +150 -0
- django_cfg/apps/knowbase/__init__.py +27 -0
- django_cfg/apps/knowbase/admin/__init__.py +23 -0
- django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
- django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
- django_cfg/apps/knowbase/admin/document_admin.py +650 -0
- django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
- django_cfg/apps/knowbase/apps.py +81 -0
- django_cfg/apps/knowbase/config/README.md +176 -0
- django_cfg/apps/knowbase/config/__init__.py +51 -0
- django_cfg/apps/knowbase/config/constance_fields.py +186 -0
- django_cfg/apps/knowbase/config/constance_settings.py +200 -0
- django_cfg/apps/knowbase/config/settings.py +450 -0
- django_cfg/apps/knowbase/examples/__init__.py +3 -0
- django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
- django_cfg/apps/knowbase/management/__init__.py +0 -0
- django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
- django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
- django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
- django_cfg/apps/knowbase/managers/__init__.py +22 -0
- django_cfg/apps/knowbase/managers/archive.py +426 -0
- django_cfg/apps/knowbase/managers/base.py +32 -0
- django_cfg/apps/knowbase/managers/chat.py +141 -0
- django_cfg/apps/knowbase/managers/document.py +203 -0
- django_cfg/apps/knowbase/managers/external_data.py +471 -0
- django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
- django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
- django_cfg/apps/knowbase/migrations/__init__.py +5 -0
- django_cfg/apps/knowbase/mixins/__init__.py +15 -0
- django_cfg/apps/knowbase/mixins/config.py +108 -0
- django_cfg/apps/knowbase/mixins/creator.py +81 -0
- django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
- django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
- django_cfg/apps/knowbase/mixins/service.py +362 -0
- django_cfg/apps/knowbase/models/__init__.py +41 -0
- django_cfg/apps/knowbase/models/archive.py +599 -0
- django_cfg/apps/knowbase/models/base.py +58 -0
- django_cfg/apps/knowbase/models/chat.py +157 -0
- django_cfg/apps/knowbase/models/document.py +267 -0
- django_cfg/apps/knowbase/models/external_data.py +376 -0
- django_cfg/apps/knowbase/serializers/__init__.py +68 -0
- django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
- django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
- django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
- django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
- django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
- django_cfg/apps/knowbase/services/__init__.py +40 -0
- django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
- django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
- django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
- django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
- django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
- django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
- django_cfg/apps/knowbase/services/base.py +53 -0
- django_cfg/apps/knowbase/services/chat_service.py +239 -0
- django_cfg/apps/knowbase/services/document_service.py +144 -0
- django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
- django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
- django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
- django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
- django_cfg/apps/knowbase/services/embedding/models.py +229 -0
- django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
- django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
- django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
- django_cfg/apps/knowbase/services/search_service.py +293 -0
- django_cfg/apps/knowbase/signals/__init__.py +21 -0
- django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
- django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
- django_cfg/apps/knowbase/signals/document_signals.py +143 -0
- django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
- django_cfg/apps/knowbase/tasks/__init__.py +39 -0
- django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
- django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
- django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
- django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
- django_cfg/apps/knowbase/urls.py +43 -0
- django_cfg/apps/knowbase/utils/__init__.py +12 -0
- django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
- django_cfg/apps/knowbase/utils/text_processing.py +375 -0
- django_cfg/apps/knowbase/utils/validation.py +99 -0
- django_cfg/apps/knowbase/views/__init__.py +28 -0
- django_cfg/apps/knowbase/views/archive_views.py +469 -0
- django_cfg/apps/knowbase/views/base.py +49 -0
- django_cfg/apps/knowbase/views/chat_views.py +181 -0
- django_cfg/apps/knowbase/views/document_views.py +183 -0
- django_cfg/apps/knowbase/views/public_views.py +129 -0
- django_cfg/apps/leads/admin.py +70 -0
- django_cfg/apps/newsletter/admin.py +234 -0
- django_cfg/apps/newsletter/admin_filters.py +124 -0
- django_cfg/apps/support/admin.py +196 -0
- django_cfg/apps/support/admin_filters.py +71 -0
- django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
- django_cfg/apps/urls.py +5 -4
- django_cfg/cli/README.md +1 -1
- django_cfg/cli/commands/create_project.py +2 -2
- django_cfg/cli/commands/info.py +1 -1
- django_cfg/config.py +44 -0
- django_cfg/core/config.py +29 -82
- django_cfg/core/environment.py +1 -1
- django_cfg/core/generation.py +19 -107
- django_cfg/{integration.py → core/integration.py} +18 -16
- django_cfg/core/validation.py +1 -1
- django_cfg/management/__init__.py +1 -1
- django_cfg/management/commands/__init__.py +1 -1
- django_cfg/management/commands/auto_generate.py +482 -0
- django_cfg/management/commands/migrator.py +19 -101
- django_cfg/management/commands/test_email.py +1 -1
- django_cfg/middleware/README.md +0 -158
- django_cfg/middleware/__init__.py +0 -2
- django_cfg/middleware/user_activity.py +3 -3
- django_cfg/models/api.py +145 -0
- django_cfg/models/base.py +287 -0
- django_cfg/models/cache.py +4 -4
- django_cfg/models/constance.py +25 -88
- django_cfg/models/database.py +9 -9
- django_cfg/models/drf.py +3 -36
- django_cfg/models/email.py +163 -0
- django_cfg/models/environment.py +276 -0
- django_cfg/models/limits.py +1 -1
- django_cfg/models/logging.py +366 -0
- django_cfg/models/revolution.py +41 -2
- django_cfg/models/security.py +125 -0
- django_cfg/models/services.py +1 -1
- django_cfg/modules/__init__.py +2 -56
- django_cfg/modules/base.py +78 -52
- django_cfg/modules/django_currency/service.py +2 -2
- django_cfg/modules/django_email.py +2 -2
- django_cfg/modules/django_health.py +267 -0
- django_cfg/modules/django_llm/llm/client.py +91 -19
- django_cfg/modules/django_llm/translator/translator.py +2 -2
- django_cfg/modules/django_logger.py +2 -2
- django_cfg/modules/django_ngrok.py +2 -2
- django_cfg/modules/django_tasks.py +68 -3
- django_cfg/modules/django_telegram.py +3 -3
- django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
- django_cfg/modules/django_twilio/service.py +2 -2
- django_cfg/modules/django_twilio/simple_service.py +2 -2
- django_cfg/modules/django_twilio/twilio_service.py +2 -2
- django_cfg/modules/django_unfold/__init__.py +69 -0
- django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
- django_cfg/modules/django_unfold/dashboard.py +278 -0
- django_cfg/modules/django_unfold/icons/README.md +145 -0
- django_cfg/modules/django_unfold/icons/__init__.py +12 -0
- django_cfg/modules/django_unfold/icons/constants.py +2851 -0
- django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
- django_cfg/modules/django_unfold/models/__init__.py +42 -0
- django_cfg/modules/django_unfold/models/config.py +601 -0
- django_cfg/modules/django_unfold/models/dashboard.py +206 -0
- django_cfg/modules/django_unfold/models/dropdown.py +40 -0
- django_cfg/modules/django_unfold/models/navigation.py +73 -0
- django_cfg/modules/django_unfold/models/tabs.py +25 -0
- django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
- django_cfg/modules/django_unfold/utils.py +140 -0
- django_cfg/registry/__init__.py +23 -0
- django_cfg/registry/core.py +61 -0
- django_cfg/registry/exceptions.py +11 -0
- django_cfg/registry/modules.py +12 -0
- django_cfg/registry/services.py +26 -0
- django_cfg/registry/third_party.py +52 -0
- django_cfg/routing/__init__.py +19 -0
- django_cfg/routing/callbacks.py +198 -0
- django_cfg/routing/routers.py +48 -0
- django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
- django_cfg/templatetags/__init__.py +0 -0
- django_cfg/templatetags/django_cfg.py +33 -0
- django_cfg/urls.py +33 -0
- django_cfg/utils/path_resolution.py +1 -1
- django_cfg/utils/smart_defaults.py +7 -61
- django_cfg/utils/toolkit.py +663 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/METADATA +83 -86
- django_cfg-1.2.1.dist-info/RECORD +441 -0
- django_cfg/archive/django_sample.zip +0 -0
- django_cfg/models/unfold.py +0 -271
- django_cfg/modules/unfold/__init__.py +0 -29
- django_cfg/modules/unfold/dashboard.py +0 -318
- django_cfg/pyproject.toml +0 -370
- django_cfg/routers.py +0 -83
- django_cfg-1.1.82.dist-info/RECORD +0 -278
- /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
- /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
- /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
- /django_cfg/{version_check.py → utils/version_check.py} +0 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/WHEEL +0 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/entry_points.txt +0 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,362 @@
|
|
1
|
+
"""
|
2
|
+
Service wrapper for ExternalData operations using the mixin system.
|
3
|
+
|
4
|
+
This provides a service-like interface for backward compatibility while
|
5
|
+
using the new mixin-based architecture internally.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
from typing import List, Dict, Any, Optional
|
10
|
+
from django.db import models, transaction
|
11
|
+
from django.utils import timezone
|
12
|
+
from pgvector.django import CosineDistance
|
13
|
+
|
14
|
+
from django_cfg.modules.django_llm.llm.client import LLMClient
|
15
|
+
from ..config.settings import get_openai_api_key, get_cache_settings
|
16
|
+
from ..utils.validation import safe_float, validate_similarity_score
|
17
|
+
from ..models.external_data import ExternalData, ExternalDataChunk, ExternalDataStatus
|
18
|
+
from ..services.base import BaseService
|
19
|
+
from ..services.embedding import process_external_data_chunks_optimized
|
20
|
+
from .creator import ExternalDataCreator
|
21
|
+
from .config import ExternalDataConfig
|
22
|
+
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
|
26
|
+
class ExternalDataService(BaseService):
|
27
|
+
"""
|
28
|
+
Service for managing external data sources within django_cfg.apps.knowbase.
|
29
|
+
|
30
|
+
This service provides backward compatibility with the old ExternalDataService
|
31
|
+
while using the new mixin-based architecture internally.
|
32
|
+
"""
|
33
|
+
|
34
|
+
def __init__(self, user):
|
35
|
+
super().__init__(user)
|
36
|
+
cache_settings = get_cache_settings()
|
37
|
+
self.llm_client = LLMClient(
|
38
|
+
apikey_openai=get_openai_api_key(),
|
39
|
+
cache_dir=cache_settings.cache_dir,
|
40
|
+
cache_ttl=cache_settings.cache_ttl,
|
41
|
+
max_cache_size=cache_settings.max_cache_size
|
42
|
+
)
|
43
|
+
|
44
|
+
def create_external_data(
|
45
|
+
self,
|
46
|
+
title: str,
|
47
|
+
source_type: str,
|
48
|
+
source_identifier: str,
|
49
|
+
content: str,
|
50
|
+
description: str = "",
|
51
|
+
source_config: Optional[Dict[str, Any]] = None,
|
52
|
+
metadata: Optional[Dict[str, Any]] = None,
|
53
|
+
tags: Optional[List[str]] = None,
|
54
|
+
similarity_threshold: float = 0.5,
|
55
|
+
is_active: bool = True,
|
56
|
+
is_public: bool = False
|
57
|
+
) -> Dict[str, Any]:
|
58
|
+
"""
|
59
|
+
Create external data using the new mixin system.
|
60
|
+
|
61
|
+
This method provides backward compatibility with the old service API.
|
62
|
+
"""
|
63
|
+
try:
|
64
|
+
# Create configuration
|
65
|
+
config = ExternalDataConfig(
|
66
|
+
title=title,
|
67
|
+
description=description,
|
68
|
+
source_type=source_type,
|
69
|
+
source_identifier=source_identifier,
|
70
|
+
content=content,
|
71
|
+
similarity_threshold=similarity_threshold,
|
72
|
+
is_active=is_active,
|
73
|
+
is_public=is_public,
|
74
|
+
metadata=metadata or {},
|
75
|
+
source_config=source_config or {},
|
76
|
+
tags=tags or []
|
77
|
+
)
|
78
|
+
|
79
|
+
# Create using the new creator
|
80
|
+
creator = ExternalDataCreator(self.user)
|
81
|
+
result = creator.create_from_config(config)
|
82
|
+
|
83
|
+
if result['success']:
|
84
|
+
return {
|
85
|
+
'success': True,
|
86
|
+
'external_data': result['external_data'],
|
87
|
+
'message': result['message']
|
88
|
+
}
|
89
|
+
else:
|
90
|
+
return {
|
91
|
+
'success': False,
|
92
|
+
'error': result['error']
|
93
|
+
}
|
94
|
+
|
95
|
+
except Exception as e:
|
96
|
+
logger.error(f"Error creating external data: {e}")
|
97
|
+
return {
|
98
|
+
'success': False,
|
99
|
+
'error': str(e)
|
100
|
+
}
|
101
|
+
|
102
|
+
@transaction.atomic
|
103
|
+
def vectorize_external_data(self, external_data_id) -> Dict[str, Any]:
|
104
|
+
"""
|
105
|
+
Vectorize external data content into chunks.
|
106
|
+
|
107
|
+
Args:
|
108
|
+
external_data_id: ExternalData ID or instance to vectorize
|
109
|
+
|
110
|
+
Returns:
|
111
|
+
dict: Result with success status, processed count, and cost.
|
112
|
+
"""
|
113
|
+
try:
|
114
|
+
# Get the external data object
|
115
|
+
if hasattr(external_data_id, 'id'):
|
116
|
+
# It's an ExternalData object
|
117
|
+
external_data = external_data_id
|
118
|
+
external_data.refresh_from_db()
|
119
|
+
else:
|
120
|
+
# It's an ID
|
121
|
+
external_data = ExternalData.objects.get(id=external_data_id, user=self.user)
|
122
|
+
|
123
|
+
# Mark as processing
|
124
|
+
external_data.status = ExternalDataStatus.PROCESSING
|
125
|
+
external_data.processing_error = ""
|
126
|
+
external_data.save()
|
127
|
+
|
128
|
+
# Clear existing chunks
|
129
|
+
external_data.chunks.all().delete()
|
130
|
+
|
131
|
+
# Generate chunks if content exists
|
132
|
+
if not external_data.content.strip():
|
133
|
+
external_data.status = ExternalDataStatus.COMPLETED
|
134
|
+
external_data.processed_at = timezone.now()
|
135
|
+
external_data.save()
|
136
|
+
return {
|
137
|
+
'success': True,
|
138
|
+
'processed_count': 0,
|
139
|
+
'cost': 0.0
|
140
|
+
}
|
141
|
+
|
142
|
+
# Use the existing chunking and embedding logic
|
143
|
+
result = process_external_data_chunks_optimized(
|
144
|
+
external_data=external_data,
|
145
|
+
llm_client=self.llm_client
|
146
|
+
)
|
147
|
+
|
148
|
+
if result.successful_chunks:
|
149
|
+
external_data.status = ExternalDataStatus.COMPLETED
|
150
|
+
external_data.processed_at = timezone.now()
|
151
|
+
external_data.processing_error = ""
|
152
|
+
else:
|
153
|
+
external_data.status = ExternalDataStatus.FAILED
|
154
|
+
external_data.processing_error = "No chunks were successfully processed"
|
155
|
+
|
156
|
+
external_data.save()
|
157
|
+
|
158
|
+
return {
|
159
|
+
'success': True,
|
160
|
+
'processed_count': len(result.successful_chunks),
|
161
|
+
'cost': result.total_cost
|
162
|
+
}
|
163
|
+
|
164
|
+
except Exception as e:
|
165
|
+
# Mark as failed
|
166
|
+
if 'external_data' in locals():
|
167
|
+
external_data.status = ExternalDataStatus.FAILED
|
168
|
+
external_data.processing_error = str(e)
|
169
|
+
external_data.save()
|
170
|
+
logger.error(f"Failed to vectorize external data: {e}")
|
171
|
+
return {
|
172
|
+
'success': False,
|
173
|
+
'error': str(e)
|
174
|
+
}
|
175
|
+
|
176
|
+
def search_external_data(
|
177
|
+
self,
|
178
|
+
query: str,
|
179
|
+
limit: int = 5,
|
180
|
+
threshold: Optional[float] = None,
|
181
|
+
source_types: Optional[List[str]] = None,
|
182
|
+
source_identifiers: Optional[List[str]] = None
|
183
|
+
) -> List[Dict[str, Any]]:
|
184
|
+
"""
|
185
|
+
Search external data using semantic similarity.
|
186
|
+
|
187
|
+
Args:
|
188
|
+
query: Search query
|
189
|
+
limit: Maximum number of results
|
190
|
+
threshold: Similarity threshold (uses per-object thresholds if None)
|
191
|
+
source_types: Filter by source types
|
192
|
+
source_identifiers: Filter by source identifiers
|
193
|
+
|
194
|
+
Returns:
|
195
|
+
List of search results with similarity scores
|
196
|
+
"""
|
197
|
+
try:
|
198
|
+
# Generate query embedding
|
199
|
+
query_embedding = self.llm_client.generate_embedding(query)
|
200
|
+
|
201
|
+
# Build query
|
202
|
+
chunks_query = ExternalDataChunk.objects.filter(
|
203
|
+
external_data__user=self.user,
|
204
|
+
external_data__is_active=True,
|
205
|
+
embedding__isnull=False
|
206
|
+
).select_related('external_data')
|
207
|
+
|
208
|
+
# Apply filters
|
209
|
+
if source_types:
|
210
|
+
chunks_query = chunks_query.filter(external_data__source_type__in=source_types)
|
211
|
+
|
212
|
+
if source_identifiers:
|
213
|
+
chunks_query = chunks_query.filter(external_data__source_identifier__in=source_identifiers)
|
214
|
+
|
215
|
+
# Calculate similarity and order by it
|
216
|
+
chunks_with_similarity = chunks_query.annotate(
|
217
|
+
similarity=1 - CosineDistance('embedding', query_embedding.embedding)
|
218
|
+
).order_by('-similarity')[:limit * 2] # Get more to filter by threshold
|
219
|
+
|
220
|
+
# Filter by threshold and format results
|
221
|
+
results = []
|
222
|
+
for chunk in chunks_with_similarity:
|
223
|
+
similarity_value = safe_float(chunk.similarity, 0.0)
|
224
|
+
|
225
|
+
# Use per-object threshold if no global threshold provided
|
226
|
+
object_threshold = threshold if threshold is not None else chunk.external_data.similarity_threshold
|
227
|
+
if similarity_value < object_threshold:
|
228
|
+
continue
|
229
|
+
|
230
|
+
results.append({
|
231
|
+
'type': 'external_data',
|
232
|
+
'chunk': chunk,
|
233
|
+
'similarity': similarity_value,
|
234
|
+
'source_title': chunk.external_data.title,
|
235
|
+
'content': chunk.content,
|
236
|
+
'metadata': {
|
237
|
+
'external_data_id': str(chunk.external_data.id),
|
238
|
+
'source_type': chunk.external_data.source_type,
|
239
|
+
'source_identifier': chunk.external_data.source_identifier,
|
240
|
+
'chunk_index': chunk.chunk_index,
|
241
|
+
**chunk.external_data.metadata
|
242
|
+
}
|
243
|
+
})
|
244
|
+
|
245
|
+
if len(results) >= limit:
|
246
|
+
break
|
247
|
+
|
248
|
+
return results
|
249
|
+
|
250
|
+
except Exception as e:
|
251
|
+
logger.error(f"Error searching external data: {e}")
|
252
|
+
return []
|
253
|
+
|
254
|
+
def get_external_data_stats(self) -> Dict[str, Any]:
|
255
|
+
"""Get statistics about external data for the user."""
|
256
|
+
try:
|
257
|
+
queryset = ExternalData.objects.filter(user=self.user)
|
258
|
+
|
259
|
+
stats = {
|
260
|
+
'total_external_data': queryset.count(),
|
261
|
+
'by_status': {},
|
262
|
+
'by_source_type': {},
|
263
|
+
'total_chunks': 0,
|
264
|
+
'total_tokens': 0,
|
265
|
+
'total_cost': 0.0
|
266
|
+
}
|
267
|
+
|
268
|
+
# Status breakdown
|
269
|
+
for status in ExternalDataStatus:
|
270
|
+
count = queryset.filter(status=status).count()
|
271
|
+
stats['by_status'][status] = count
|
272
|
+
|
273
|
+
# Source type breakdown
|
274
|
+
source_types = queryset.values_list('source_type', flat=True).distinct()
|
275
|
+
for source_type in source_types:
|
276
|
+
count = queryset.filter(source_type=source_type).count()
|
277
|
+
stats['by_source_type'][source_type] = count
|
278
|
+
|
279
|
+
# Aggregate statistics
|
280
|
+
aggregates = queryset.aggregate(
|
281
|
+
total_chunks=models.Sum('total_chunks'),
|
282
|
+
total_tokens=models.Sum('total_tokens'),
|
283
|
+
total_cost=models.Sum('processing_cost')
|
284
|
+
)
|
285
|
+
|
286
|
+
stats.update({
|
287
|
+
'total_chunks': aggregates['total_chunks'] or 0,
|
288
|
+
'total_tokens': aggregates['total_tokens'] or 0,
|
289
|
+
'total_cost': float(aggregates['total_cost'] or 0.0)
|
290
|
+
})
|
291
|
+
|
292
|
+
return stats
|
293
|
+
|
294
|
+
except Exception as e:
|
295
|
+
logger.error(f"Error getting external data stats: {e}")
|
296
|
+
return {
|
297
|
+
'total_external_data': 0,
|
298
|
+
'by_status': {},
|
299
|
+
'by_source_type': {},
|
300
|
+
'total_chunks': 0,
|
301
|
+
'total_tokens': 0,
|
302
|
+
'total_cost': 0.0
|
303
|
+
}
|
304
|
+
|
305
|
+
def delete_external_data(self, external_data_id) -> Dict[str, Any]:
|
306
|
+
"""Delete external data and all associated chunks."""
|
307
|
+
try:
|
308
|
+
external_data = ExternalData.objects.get(id=external_data_id, user=self.user)
|
309
|
+
title = external_data.title
|
310
|
+
external_data.delete()
|
311
|
+
|
312
|
+
return {
|
313
|
+
'success': True,
|
314
|
+
'message': f"External data '{title}' deleted successfully"
|
315
|
+
}
|
316
|
+
|
317
|
+
except ExternalData.DoesNotExist:
|
318
|
+
return {
|
319
|
+
'success': False,
|
320
|
+
'error': "External data not found"
|
321
|
+
}
|
322
|
+
except Exception as e:
|
323
|
+
logger.error(f"Error deleting external data: {e}")
|
324
|
+
return {
|
325
|
+
'success': False,
|
326
|
+
'error': str(e)
|
327
|
+
}
|
328
|
+
|
329
|
+
def bulk_vectorize_pending(self) -> Dict[str, Any]:
|
330
|
+
"""Vectorize all pending external data for the user."""
|
331
|
+
try:
|
332
|
+
pending_data = ExternalData.objects.filter(
|
333
|
+
user=self.user,
|
334
|
+
status=ExternalDataStatus.PENDING
|
335
|
+
)
|
336
|
+
|
337
|
+
stats = {
|
338
|
+
'total': pending_data.count(),
|
339
|
+
'processed': 0,
|
340
|
+
'failed': 0,
|
341
|
+
'total_cost': 0.0
|
342
|
+
}
|
343
|
+
|
344
|
+
for external_data in pending_data:
|
345
|
+
result = self.vectorize_external_data(external_data)
|
346
|
+
if result.get('success', False):
|
347
|
+
stats['processed'] += 1
|
348
|
+
stats['total_cost'] += result.get('cost', 0.0)
|
349
|
+
else:
|
350
|
+
stats['failed'] += 1
|
351
|
+
|
352
|
+
return {
|
353
|
+
'success': True,
|
354
|
+
'stats': stats
|
355
|
+
}
|
356
|
+
|
357
|
+
except Exception as e:
|
358
|
+
logger.error(f"Error in bulk vectorization: {e}")
|
359
|
+
return {
|
360
|
+
'success': False,
|
361
|
+
'error': str(e)
|
362
|
+
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
"""
|
2
|
+
Knowledge Base Models
|
3
|
+
|
4
|
+
Comprehensive models for RAG-powered knowledge management system.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from .base import *
|
8
|
+
from .document import *
|
9
|
+
from .chat import *
|
10
|
+
from .archive import *
|
11
|
+
from .external_data import *
|
12
|
+
|
13
|
+
__all__ = [
|
14
|
+
# Base models
|
15
|
+
'ProcessingStatus',
|
16
|
+
'TimestampedModel',
|
17
|
+
'UserScopedModel',
|
18
|
+
|
19
|
+
# Document models
|
20
|
+
'DocumentCategory',
|
21
|
+
'Document',
|
22
|
+
'DocumentChunk',
|
23
|
+
|
24
|
+
# Archive models
|
25
|
+
'ArchiveType',
|
26
|
+
'ContentType',
|
27
|
+
'ChunkType',
|
28
|
+
'DocumentArchive',
|
29
|
+
'ArchiveItem',
|
30
|
+
'ArchiveItemChunk',
|
31
|
+
|
32
|
+
# Chat models
|
33
|
+
'ChatSession',
|
34
|
+
'ChatMessage',
|
35
|
+
|
36
|
+
# External Data models
|
37
|
+
'ExternalDataType',
|
38
|
+
'ExternalDataStatus',
|
39
|
+
'ExternalData',
|
40
|
+
'ExternalDataChunk',
|
41
|
+
]
|