django-cfg 1.1.82__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_cfg/__init__.py +20 -448
- django_cfg/apps/accounts/README.md +3 -3
- django_cfg/apps/accounts/admin/__init__.py +0 -2
- django_cfg/apps/accounts/admin/activity.py +2 -9
- django_cfg/apps/accounts/admin/filters.py +0 -42
- django_cfg/apps/accounts/admin/inlines.py +8 -8
- django_cfg/apps/accounts/admin/otp.py +5 -5
- django_cfg/apps/accounts/admin/registration_source.py +1 -8
- django_cfg/apps/accounts/admin/user.py +12 -20
- django_cfg/apps/accounts/managers/user_manager.py +2 -129
- django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
- django_cfg/apps/accounts/models.py +3 -123
- django_cfg/apps/accounts/serializers/otp.py +40 -44
- django_cfg/apps/accounts/serializers/profile.py +0 -2
- django_cfg/apps/accounts/services/otp_service.py +98 -186
- django_cfg/apps/accounts/signals.py +25 -15
- django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
- django_cfg/apps/accounts/views/otp.py +35 -36
- django_cfg/apps/agents/README.md +129 -0
- django_cfg/apps/agents/__init__.py +68 -0
- django_cfg/apps/agents/admin/__init__.py +17 -0
- django_cfg/apps/agents/admin/execution_admin.py +460 -0
- django_cfg/apps/agents/admin/registry_admin.py +360 -0
- django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
- django_cfg/apps/agents/apps.py +29 -0
- django_cfg/apps/agents/core/__init__.py +20 -0
- django_cfg/apps/agents/core/agent.py +281 -0
- django_cfg/apps/agents/core/dependencies.py +154 -0
- django_cfg/apps/agents/core/exceptions.py +66 -0
- django_cfg/apps/agents/core/models.py +106 -0
- django_cfg/apps/agents/core/orchestrator.py +391 -0
- django_cfg/apps/agents/examples/__init__.py +3 -0
- django_cfg/apps/agents/examples/simple_example.py +161 -0
- django_cfg/apps/agents/integration/__init__.py +14 -0
- django_cfg/apps/agents/integration/middleware.py +80 -0
- django_cfg/apps/agents/integration/registry.py +345 -0
- django_cfg/apps/agents/integration/signals.py +50 -0
- django_cfg/apps/agents/management/__init__.py +3 -0
- django_cfg/apps/agents/management/commands/__init__.py +3 -0
- django_cfg/apps/agents/management/commands/create_agent.py +365 -0
- django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
- django_cfg/apps/agents/managers/__init__.py +23 -0
- django_cfg/apps/agents/managers/execution.py +236 -0
- django_cfg/apps/agents/managers/registry.py +254 -0
- django_cfg/apps/agents/managers/toolsets.py +496 -0
- django_cfg/apps/agents/migrations/0001_initial.py +286 -0
- django_cfg/apps/agents/migrations/__init__.py +5 -0
- django_cfg/apps/agents/models/__init__.py +15 -0
- django_cfg/apps/agents/models/execution.py +215 -0
- django_cfg/apps/agents/models/registry.py +220 -0
- django_cfg/apps/agents/models/toolsets.py +305 -0
- django_cfg/apps/agents/patterns/__init__.py +24 -0
- django_cfg/apps/agents/patterns/content_agents.py +234 -0
- django_cfg/apps/agents/toolsets/__init__.py +15 -0
- django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
- django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
- django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
- django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
- django_cfg/apps/agents/urls.py +46 -0
- django_cfg/apps/knowbase/README.md +150 -0
- django_cfg/apps/knowbase/__init__.py +27 -0
- django_cfg/apps/knowbase/admin/__init__.py +23 -0
- django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
- django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
- django_cfg/apps/knowbase/admin/document_admin.py +650 -0
- django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
- django_cfg/apps/knowbase/apps.py +81 -0
- django_cfg/apps/knowbase/config/README.md +176 -0
- django_cfg/apps/knowbase/config/__init__.py +51 -0
- django_cfg/apps/knowbase/config/constance_fields.py +186 -0
- django_cfg/apps/knowbase/config/constance_settings.py +200 -0
- django_cfg/apps/knowbase/config/settings.py +444 -0
- django_cfg/apps/knowbase/examples/__init__.py +3 -0
- django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
- django_cfg/apps/knowbase/management/__init__.py +0 -0
- django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
- django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
- django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
- django_cfg/apps/knowbase/managers/__init__.py +22 -0
- django_cfg/apps/knowbase/managers/archive.py +426 -0
- django_cfg/apps/knowbase/managers/base.py +32 -0
- django_cfg/apps/knowbase/managers/chat.py +141 -0
- django_cfg/apps/knowbase/managers/document.py +203 -0
- django_cfg/apps/knowbase/managers/external_data.py +471 -0
- django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
- django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
- django_cfg/apps/knowbase/migrations/__init__.py +5 -0
- django_cfg/apps/knowbase/mixins/__init__.py +15 -0
- django_cfg/apps/knowbase/mixins/config.py +108 -0
- django_cfg/apps/knowbase/mixins/creator.py +81 -0
- django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
- django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
- django_cfg/apps/knowbase/mixins/service.py +362 -0
- django_cfg/apps/knowbase/models/__init__.py +41 -0
- django_cfg/apps/knowbase/models/archive.py +599 -0
- django_cfg/apps/knowbase/models/base.py +58 -0
- django_cfg/apps/knowbase/models/chat.py +157 -0
- django_cfg/apps/knowbase/models/document.py +267 -0
- django_cfg/apps/knowbase/models/external_data.py +376 -0
- django_cfg/apps/knowbase/serializers/__init__.py +68 -0
- django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
- django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
- django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
- django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
- django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
- django_cfg/apps/knowbase/services/__init__.py +40 -0
- django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
- django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
- django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
- django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
- django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
- django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
- django_cfg/apps/knowbase/services/base.py +53 -0
- django_cfg/apps/knowbase/services/chat_service.py +239 -0
- django_cfg/apps/knowbase/services/document_service.py +144 -0
- django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
- django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
- django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
- django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
- django_cfg/apps/knowbase/services/embedding/models.py +229 -0
- django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
- django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
- django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
- django_cfg/apps/knowbase/services/search_service.py +293 -0
- django_cfg/apps/knowbase/signals/__init__.py +21 -0
- django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
- django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
- django_cfg/apps/knowbase/signals/document_signals.py +143 -0
- django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
- django_cfg/apps/knowbase/tasks/__init__.py +39 -0
- django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
- django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
- django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
- django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
- django_cfg/apps/knowbase/urls.py +43 -0
- django_cfg/apps/knowbase/utils/__init__.py +12 -0
- django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
- django_cfg/apps/knowbase/utils/text_processing.py +375 -0
- django_cfg/apps/knowbase/utils/validation.py +99 -0
- django_cfg/apps/knowbase/views/__init__.py +28 -0
- django_cfg/apps/knowbase/views/archive_views.py +469 -0
- django_cfg/apps/knowbase/views/base.py +49 -0
- django_cfg/apps/knowbase/views/chat_views.py +181 -0
- django_cfg/apps/knowbase/views/document_views.py +183 -0
- django_cfg/apps/knowbase/views/public_views.py +129 -0
- django_cfg/apps/leads/admin.py +70 -0
- django_cfg/apps/newsletter/admin.py +234 -0
- django_cfg/apps/newsletter/admin_filters.py +124 -0
- django_cfg/apps/support/admin.py +196 -0
- django_cfg/apps/support/admin_filters.py +71 -0
- django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
- django_cfg/apps/urls.py +5 -4
- django_cfg/cli/README.md +1 -1
- django_cfg/cli/commands/create_project.py +2 -2
- django_cfg/cli/commands/info.py +1 -1
- django_cfg/config.py +44 -0
- django_cfg/core/config.py +29 -82
- django_cfg/core/environment.py +1 -1
- django_cfg/core/generation.py +19 -107
- django_cfg/{integration.py → core/integration.py} +18 -16
- django_cfg/core/validation.py +1 -1
- django_cfg/management/__init__.py +1 -1
- django_cfg/management/commands/__init__.py +1 -1
- django_cfg/management/commands/auto_generate.py +482 -0
- django_cfg/management/commands/migrator.py +19 -101
- django_cfg/management/commands/test_email.py +1 -1
- django_cfg/middleware/README.md +0 -158
- django_cfg/middleware/__init__.py +0 -2
- django_cfg/middleware/user_activity.py +3 -3
- django_cfg/models/api.py +145 -0
- django_cfg/models/base.py +287 -0
- django_cfg/models/cache.py +4 -4
- django_cfg/models/constance.py +25 -88
- django_cfg/models/database.py +9 -9
- django_cfg/models/drf.py +3 -36
- django_cfg/models/email.py +163 -0
- django_cfg/models/environment.py +276 -0
- django_cfg/models/limits.py +1 -1
- django_cfg/models/logging.py +366 -0
- django_cfg/models/revolution.py +41 -2
- django_cfg/models/security.py +125 -0
- django_cfg/models/services.py +1 -1
- django_cfg/modules/__init__.py +2 -56
- django_cfg/modules/base.py +78 -52
- django_cfg/modules/django_currency/service.py +2 -2
- django_cfg/modules/django_email.py +2 -2
- django_cfg/modules/django_health.py +267 -0
- django_cfg/modules/django_llm/llm/client.py +79 -17
- django_cfg/modules/django_llm/translator/translator.py +2 -2
- django_cfg/modules/django_logger.py +2 -2
- django_cfg/modules/django_ngrok.py +2 -2
- django_cfg/modules/django_tasks.py +68 -3
- django_cfg/modules/django_telegram.py +3 -3
- django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
- django_cfg/modules/django_twilio/service.py +2 -2
- django_cfg/modules/django_twilio/simple_service.py +2 -2
- django_cfg/modules/django_twilio/twilio_service.py +2 -2
- django_cfg/modules/django_unfold/__init__.py +69 -0
- django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
- django_cfg/modules/django_unfold/dashboard.py +278 -0
- django_cfg/modules/django_unfold/icons/README.md +145 -0
- django_cfg/modules/django_unfold/icons/__init__.py +12 -0
- django_cfg/modules/django_unfold/icons/constants.py +2851 -0
- django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
- django_cfg/modules/django_unfold/models/__init__.py +42 -0
- django_cfg/modules/django_unfold/models/config.py +601 -0
- django_cfg/modules/django_unfold/models/dashboard.py +206 -0
- django_cfg/modules/django_unfold/models/dropdown.py +40 -0
- django_cfg/modules/django_unfold/models/navigation.py +73 -0
- django_cfg/modules/django_unfold/models/tabs.py +25 -0
- django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
- django_cfg/modules/django_unfold/utils.py +140 -0
- django_cfg/registry/__init__.py +23 -0
- django_cfg/registry/core.py +61 -0
- django_cfg/registry/exceptions.py +11 -0
- django_cfg/registry/modules.py +12 -0
- django_cfg/registry/services.py +26 -0
- django_cfg/registry/third_party.py +52 -0
- django_cfg/routing/__init__.py +19 -0
- django_cfg/routing/callbacks.py +198 -0
- django_cfg/routing/routers.py +48 -0
- django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
- django_cfg/templatetags/__init__.py +0 -0
- django_cfg/templatetags/django_cfg.py +33 -0
- django_cfg/urls.py +33 -0
- django_cfg/utils/path_resolution.py +1 -1
- django_cfg/utils/smart_defaults.py +7 -61
- django_cfg/utils/toolkit.py +663 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.0.dist-info}/METADATA +83 -86
- django_cfg-1.2.0.dist-info/RECORD +441 -0
- django_cfg/archive/django_sample.zip +0 -0
- django_cfg/models/unfold.py +0 -271
- django_cfg/modules/unfold/__init__.py +0 -29
- django_cfg/modules/unfold/dashboard.py +0 -318
- django_cfg/pyproject.toml +0 -370
- django_cfg/routers.py +0 -83
- django_cfg-1.1.82.dist-info/RECORD +0 -278
- /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
- /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
- /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
- /django_cfg/{version_check.py → utils/version_check.py} +0 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.0.dist-info}/WHEEL +0 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.0.dist-info}/entry_points.txt +0 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,471 @@
|
|
1
|
+
"""
|
2
|
+
External Data managers for advanced querying and operations.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from django.db import models
|
6
|
+
from django.db.models import Count, Q, Avg, Sum
|
7
|
+
from django.utils import timezone
|
8
|
+
from typing import Optional, List, Dict, Any
|
9
|
+
from datetime import timedelta
|
10
|
+
|
11
|
+
from .base import BaseKnowbaseManager
|
12
|
+
|
13
|
+
|
14
|
+
class ExternalDataQuerySet(models.QuerySet):
|
15
|
+
"""Custom QuerySet for ExternalData with advanced filtering."""
|
16
|
+
|
17
|
+
def active(self):
|
18
|
+
"""Filter to active external data sources."""
|
19
|
+
return self.filter(is_active=True)
|
20
|
+
|
21
|
+
def public(self):
|
22
|
+
"""Filter to public external data sources."""
|
23
|
+
return self.filter(is_public=True)
|
24
|
+
|
25
|
+
def processed(self):
|
26
|
+
"""Filter to successfully processed external data."""
|
27
|
+
return self.filter(status='completed')
|
28
|
+
|
29
|
+
def failed(self):
|
30
|
+
"""Filter to failed external data."""
|
31
|
+
return self.filter(status='failed')
|
32
|
+
|
33
|
+
def outdated(self):
|
34
|
+
"""Filter to outdated external data that needs reprocessing."""
|
35
|
+
return self.filter(
|
36
|
+
Q(status='outdated') |
|
37
|
+
Q(source_updated_at__gt=models.F('processed_at'))
|
38
|
+
)
|
39
|
+
|
40
|
+
def by_source_type(self, source_type: str):
|
41
|
+
"""Filter by source type."""
|
42
|
+
return self.filter(source_type=source_type)
|
43
|
+
|
44
|
+
def by_status(self, status: str):
|
45
|
+
"""Filter by status."""
|
46
|
+
return self.filter(status=status)
|
47
|
+
|
48
|
+
def get_processing_statistics(self):
|
49
|
+
"""Get processing statistics for external data."""
|
50
|
+
from django.db.models import Count, Q
|
51
|
+
|
52
|
+
stats = self.aggregate(
|
53
|
+
total=Count('id'),
|
54
|
+
pending=Count('id', filter=Q(status='pending')),
|
55
|
+
processing=Count('id', filter=Q(status='processing')),
|
56
|
+
completed=Count('id', filter=Q(status='completed')),
|
57
|
+
failed=Count('id', filter=Q(status='failed')),
|
58
|
+
)
|
59
|
+
|
60
|
+
return {
|
61
|
+
'total_external_data': stats['total'],
|
62
|
+
'pending_processing': stats['pending'],
|
63
|
+
'currently_processing': stats['processing'],
|
64
|
+
'completed_processing': stats['completed'],
|
65
|
+
'failed_processing': stats['failed'],
|
66
|
+
}
|
67
|
+
|
68
|
+
def by_category(self, category):
|
69
|
+
"""Filter by category."""
|
70
|
+
return self.filter(category=category)
|
71
|
+
|
72
|
+
def with_tags(self, tags: List[str]):
|
73
|
+
"""Filter external data that contains any of the specified tags."""
|
74
|
+
if not tags:
|
75
|
+
return self
|
76
|
+
|
77
|
+
q = Q()
|
78
|
+
for tag in tags:
|
79
|
+
q |= Q(tags__contains=[tag])
|
80
|
+
return self.filter(q)
|
81
|
+
|
82
|
+
def search_content(self, query: str):
|
83
|
+
"""Search in title, description, and content."""
|
84
|
+
return self.filter(
|
85
|
+
Q(title__icontains=query) |
|
86
|
+
Q(description__icontains=query) |
|
87
|
+
Q(content__icontains=query)
|
88
|
+
)
|
89
|
+
|
90
|
+
def recent(self, days: int = 7):
|
91
|
+
"""Filter to recently processed external data."""
|
92
|
+
cutoff = timezone.now() - timedelta(days=days)
|
93
|
+
return self.filter(processed_at__gte=cutoff)
|
94
|
+
|
95
|
+
def with_chunks(self):
|
96
|
+
"""Filter to external data that has chunks."""
|
97
|
+
return self.filter(total_chunks__gt=0)
|
98
|
+
|
99
|
+
def without_chunks(self):
|
100
|
+
"""Filter to external data without chunks."""
|
101
|
+
return self.filter(total_chunks=0)
|
102
|
+
|
103
|
+
def expensive(self, min_cost: float = 0.01):
|
104
|
+
"""Filter to external data with high processing costs."""
|
105
|
+
return self.filter(processing_cost__gte=min_cost)
|
106
|
+
|
107
|
+
def with_statistics(self):
|
108
|
+
"""Annotate with chunk and cost statistics."""
|
109
|
+
return self.annotate(
|
110
|
+
chunks_count=Count('chunks'),
|
111
|
+
avg_chunk_tokens=Avg('chunks__token_count'),
|
112
|
+
total_embedding_cost=Sum('chunks__embedding_cost')
|
113
|
+
)
|
114
|
+
|
115
|
+
|
116
|
+
class ExternalDataManager(BaseKnowbaseManager):
|
117
|
+
"""Manager for ExternalData with user scoping and advanced queries."""
|
118
|
+
|
119
|
+
def get_queryset(self):
|
120
|
+
return ExternalDataQuerySet(self.model, using=self._db)
|
121
|
+
|
122
|
+
def active(self):
|
123
|
+
"""Get active external data sources."""
|
124
|
+
return self.get_queryset().active()
|
125
|
+
|
126
|
+
def public(self):
|
127
|
+
"""Get public external data sources."""
|
128
|
+
return self.get_queryset().public()
|
129
|
+
|
130
|
+
def processed(self):
|
131
|
+
"""Get successfully processed external data."""
|
132
|
+
return self.get_queryset().processed()
|
133
|
+
|
134
|
+
def failed(self):
|
135
|
+
"""Get failed external data."""
|
136
|
+
return self.get_queryset().failed()
|
137
|
+
|
138
|
+
def outdated(self):
|
139
|
+
"""Get outdated external data that needs reprocessing."""
|
140
|
+
return self.get_queryset().outdated()
|
141
|
+
|
142
|
+
def by_source_type(self, source_type: str):
|
143
|
+
"""Get external data by source type."""
|
144
|
+
return self.get_queryset().by_source_type(source_type)
|
145
|
+
|
146
|
+
def by_status(self, status: str):
|
147
|
+
"""Get external data by status."""
|
148
|
+
return self.get_queryset().by_status(status)
|
149
|
+
|
150
|
+
def by_category(self, category):
|
151
|
+
"""Get external data by category."""
|
152
|
+
return self.get_queryset().by_category(category)
|
153
|
+
|
154
|
+
def with_tags(self, tags: List[str]):
|
155
|
+
"""Get external data with specified tags."""
|
156
|
+
return self.get_queryset().with_tags(tags)
|
157
|
+
|
158
|
+
def search_content(self, query: str):
|
159
|
+
"""Search external data content."""
|
160
|
+
return self.get_queryset().search_content(query)
|
161
|
+
|
162
|
+
def recent(self, days: int = 7):
|
163
|
+
"""Get recently processed external data."""
|
164
|
+
return self.get_queryset().recent(days)
|
165
|
+
|
166
|
+
def with_chunks(self):
|
167
|
+
"""Get external data that has chunks."""
|
168
|
+
return self.get_queryset().with_chunks()
|
169
|
+
|
170
|
+
def without_chunks(self):
|
171
|
+
"""Get external data without chunks."""
|
172
|
+
return self.get_queryset().without_chunks()
|
173
|
+
|
174
|
+
def expensive(self, min_cost: float = 0.01):
|
175
|
+
"""Get external data with high processing costs."""
|
176
|
+
return self.get_queryset().expensive(min_cost)
|
177
|
+
|
178
|
+
def with_statistics(self):
|
179
|
+
"""Get external data with statistics."""
|
180
|
+
return self.get_queryset().with_statistics()
|
181
|
+
|
182
|
+
def create_from_source(
|
183
|
+
self,
|
184
|
+
user,
|
185
|
+
title: str,
|
186
|
+
source_type: str,
|
187
|
+
source_identifier: str,
|
188
|
+
content: str,
|
189
|
+
source_config: Optional[Dict[str, Any]] = None,
|
190
|
+
metadata: Optional[Dict[str, Any]] = None,
|
191
|
+
**kwargs
|
192
|
+
):
|
193
|
+
"""
|
194
|
+
Create external data from a source.
|
195
|
+
|
196
|
+
Args:
|
197
|
+
user: User creating the external data
|
198
|
+
title: Human-readable title
|
199
|
+
source_type: Type of source (model, api, etc.)
|
200
|
+
source_identifier: Unique identifier for the source
|
201
|
+
content: Extracted content
|
202
|
+
source_config: Configuration for data extraction
|
203
|
+
metadata: Additional metadata
|
204
|
+
**kwargs: Additional fields
|
205
|
+
|
206
|
+
Returns:
|
207
|
+
ExternalData instance
|
208
|
+
"""
|
209
|
+
return self.create(
|
210
|
+
user=user,
|
211
|
+
title=title,
|
212
|
+
source_type=source_type,
|
213
|
+
source_identifier=source_identifier,
|
214
|
+
content=content,
|
215
|
+
source_config=source_config or {},
|
216
|
+
metadata=metadata or {},
|
217
|
+
**kwargs
|
218
|
+
)
|
219
|
+
|
220
|
+
def get_or_create_from_source(
|
221
|
+
self,
|
222
|
+
user,
|
223
|
+
source_identifier: str,
|
224
|
+
defaults: Optional[Dict[str, Any]] = None
|
225
|
+
):
|
226
|
+
"""
|
227
|
+
Get or create external data for a source identifier.
|
228
|
+
|
229
|
+
Args:
|
230
|
+
user: User
|
231
|
+
source_identifier: Unique identifier for the source
|
232
|
+
defaults: Default values for creation
|
233
|
+
|
234
|
+
Returns:
|
235
|
+
Tuple of (ExternalData, created)
|
236
|
+
"""
|
237
|
+
return self.get_or_create(
|
238
|
+
user=user,
|
239
|
+
source_identifier=source_identifier,
|
240
|
+
defaults=defaults or {}
|
241
|
+
)
|
242
|
+
|
243
|
+
def bulk_update_status(self, external_data_ids: List[str], status: str):
|
244
|
+
"""
|
245
|
+
Bulk update status for multiple external data sources.
|
246
|
+
|
247
|
+
Args:
|
248
|
+
external_data_ids: List of external data IDs
|
249
|
+
status: New status
|
250
|
+
|
251
|
+
Returns:
|
252
|
+
Number of updated records
|
253
|
+
"""
|
254
|
+
return self.filter(id__in=external_data_ids).update(
|
255
|
+
status=status,
|
256
|
+
updated_at=timezone.now()
|
257
|
+
)
|
258
|
+
|
259
|
+
def get_processing_statistics(self, user=None) -> Dict[str, Any]:
|
260
|
+
"""
|
261
|
+
Get processing statistics for external data.
|
262
|
+
|
263
|
+
Args:
|
264
|
+
user: Optional user filter
|
265
|
+
|
266
|
+
Returns:
|
267
|
+
Dictionary with statistics
|
268
|
+
"""
|
269
|
+
queryset = self.get_queryset()
|
270
|
+
if user:
|
271
|
+
queryset = queryset.filter(user=user)
|
272
|
+
|
273
|
+
stats = queryset.aggregate(
|
274
|
+
total_count=Count('id'),
|
275
|
+
processed_count=Count('id', filter=Q(status='completed')),
|
276
|
+
failed_count=Count('id', filter=Q(status='failed')),
|
277
|
+
pending_count=Count('id', filter=Q(status='pending')),
|
278
|
+
outdated_count=Count('id', filter=Q(status='outdated')),
|
279
|
+
total_chunks=Sum('total_chunks'),
|
280
|
+
total_tokens=Sum('total_tokens'),
|
281
|
+
total_cost=Sum('processing_cost'),
|
282
|
+
avg_chunk_size=Avg('chunk_size'),
|
283
|
+
)
|
284
|
+
|
285
|
+
# Calculate percentages
|
286
|
+
total = stats['total_count'] or 0
|
287
|
+
if total > 0:
|
288
|
+
stats['processed_percentage'] = (stats['processed_count'] or 0) / total * 100
|
289
|
+
stats['failed_percentage'] = (stats['failed_count'] or 0) / total * 100
|
290
|
+
stats['pending_percentage'] = (stats['pending_count'] or 0) / total * 100
|
291
|
+
stats['outdated_percentage'] = (stats['outdated_count'] or 0) / total * 100
|
292
|
+
else:
|
293
|
+
stats['processed_percentage'] = 0
|
294
|
+
stats['failed_percentage'] = 0
|
295
|
+
stats['pending_percentage'] = 0
|
296
|
+
stats['outdated_percentage'] = 0
|
297
|
+
|
298
|
+
return stats
|
299
|
+
|
300
|
+
def cleanup_failed(self, older_than_days: int = 7) -> int:
|
301
|
+
"""
|
302
|
+
Clean up old failed external data sources.
|
303
|
+
|
304
|
+
Args:
|
305
|
+
older_than_days: Remove failed sources older than this many days
|
306
|
+
|
307
|
+
Returns:
|
308
|
+
Number of deleted records
|
309
|
+
"""
|
310
|
+
cutoff = timezone.now() - timedelta(days=older_than_days)
|
311
|
+
failed_queryset = self.failed().filter(updated_at__lt=cutoff)
|
312
|
+
count = failed_queryset.count()
|
313
|
+
failed_queryset.delete()
|
314
|
+
return count
|
315
|
+
|
316
|
+
def regenerate_external_data(self, external_data_ids: List[str]) -> Dict[str, Any]:
|
317
|
+
"""
|
318
|
+
Regenerate embeddings for specified external data sources.
|
319
|
+
|
320
|
+
Args:
|
321
|
+
external_data_ids: List of external data IDs to regenerate
|
322
|
+
|
323
|
+
Returns:
|
324
|
+
Dictionary with regeneration results
|
325
|
+
"""
|
326
|
+
from ..models.external_data import ExternalDataStatus
|
327
|
+
from ..tasks.external_data_tasks import process_external_data_async
|
328
|
+
|
329
|
+
external_data_list = list(self.get_queryset().filter(id__in=external_data_ids))
|
330
|
+
|
331
|
+
if not external_data_list:
|
332
|
+
return {
|
333
|
+
'success': False,
|
334
|
+
'error': 'No external data found with provided IDs',
|
335
|
+
'regenerated_count': 0,
|
336
|
+
'failed_count': 0
|
337
|
+
}
|
338
|
+
|
339
|
+
regenerated_count = 0
|
340
|
+
failed_count = 0
|
341
|
+
errors = []
|
342
|
+
|
343
|
+
for external_data in external_data_list:
|
344
|
+
try:
|
345
|
+
# Reset processing state
|
346
|
+
external_data.status = ExternalDataStatus.PENDING
|
347
|
+
external_data.processing_error = ""
|
348
|
+
external_data.processed_at = None
|
349
|
+
external_data.total_chunks = 0
|
350
|
+
external_data.total_tokens = 0
|
351
|
+
external_data.processing_cost = 0.0
|
352
|
+
external_data.save(update_fields=[
|
353
|
+
'status', 'processing_error', 'processed_at',
|
354
|
+
'total_chunks', 'total_tokens', 'processing_cost'
|
355
|
+
])
|
356
|
+
|
357
|
+
# Clear existing chunks
|
358
|
+
external_data.chunks.all().delete()
|
359
|
+
|
360
|
+
# Queue for reprocessing with force flag
|
361
|
+
process_external_data_async.send(
|
362
|
+
str(external_data.id),
|
363
|
+
force_reprocess=True
|
364
|
+
)
|
365
|
+
|
366
|
+
regenerated_count += 1
|
367
|
+
|
368
|
+
except Exception as e:
|
369
|
+
failed_count += 1
|
370
|
+
errors.append(f"Failed to regenerate {external_data.title}: {str(e)}")
|
371
|
+
|
372
|
+
return {
|
373
|
+
'success': regenerated_count > 0,
|
374
|
+
'regenerated_count': regenerated_count,
|
375
|
+
'failed_count': failed_count,
|
376
|
+
'total_count': len(external_data_list),
|
377
|
+
'errors': errors
|
378
|
+
}
|
379
|
+
|
380
|
+
|
381
|
+
class ExternalDataChunkQuerySet(models.QuerySet):
|
382
|
+
"""Custom QuerySet for ExternalDataChunk."""
|
383
|
+
|
384
|
+
def by_external_data(self, external_data):
|
385
|
+
"""Filter by external data."""
|
386
|
+
return self.filter(external_data=external_data)
|
387
|
+
|
388
|
+
def by_embedding_model(self, model: str):
|
389
|
+
"""Filter by embedding model."""
|
390
|
+
return self.filter(embedding_model=model)
|
391
|
+
|
392
|
+
def with_embeddings(self):
|
393
|
+
"""Filter to chunks that have embeddings."""
|
394
|
+
return self.filter(embedding__isnull=False)
|
395
|
+
|
396
|
+
def without_embeddings(self):
|
397
|
+
"""Filter to chunks without embeddings."""
|
398
|
+
return self.filter(embedding__isnull=True)
|
399
|
+
|
400
|
+
def large_chunks(self, min_tokens: int = 500):
|
401
|
+
"""Filter to large chunks."""
|
402
|
+
return self.filter(token_count__gte=min_tokens)
|
403
|
+
|
404
|
+
def small_chunks(self, max_tokens: int = 100):
|
405
|
+
"""Filter to small chunks."""
|
406
|
+
return self.filter(token_count__lte=max_tokens)
|
407
|
+
|
408
|
+
def expensive_chunks(self, min_cost: float = 0.001):
|
409
|
+
"""Filter to expensive chunks."""
|
410
|
+
return self.filter(embedding_cost__gte=min_cost)
|
411
|
+
|
412
|
+
|
413
|
+
class ExternalDataChunkManager(models.Manager):
|
414
|
+
"""Manager for ExternalDataChunk."""
|
415
|
+
|
416
|
+
def get_queryset(self):
|
417
|
+
return ExternalDataChunkQuerySet(self.model, using=self._db)
|
418
|
+
|
419
|
+
def by_external_data(self, external_data):
|
420
|
+
"""Get chunks for external data."""
|
421
|
+
return self.get_queryset().by_external_data(external_data)
|
422
|
+
|
423
|
+
def by_embedding_model(self, model: str):
|
424
|
+
"""Get chunks by embedding model."""
|
425
|
+
return self.get_queryset().by_embedding_model(model)
|
426
|
+
|
427
|
+
def with_embeddings(self):
|
428
|
+
"""Get chunks with embeddings."""
|
429
|
+
return self.get_queryset().with_embeddings()
|
430
|
+
|
431
|
+
def without_embeddings(self):
|
432
|
+
"""Get chunks without embeddings."""
|
433
|
+
return self.get_queryset().without_embeddings()
|
434
|
+
|
435
|
+
def large_chunks(self, min_tokens: int = 500):
|
436
|
+
"""Get large chunks."""
|
437
|
+
return self.get_queryset().large_chunks(min_tokens)
|
438
|
+
|
439
|
+
def small_chunks(self, max_tokens: int = 100):
|
440
|
+
"""Get small chunks."""
|
441
|
+
return self.get_queryset().small_chunks(max_tokens)
|
442
|
+
|
443
|
+
def expensive_chunks(self, min_cost: float = 0.001):
|
444
|
+
"""Get expensive chunks."""
|
445
|
+
return self.get_queryset().expensive_chunks(min_cost)
|
446
|
+
|
447
|
+
def get_chunk_statistics(self, user=None) -> Dict[str, Any]:
|
448
|
+
"""
|
449
|
+
Get chunk statistics.
|
450
|
+
|
451
|
+
Args:
|
452
|
+
user: Optional user filter
|
453
|
+
|
454
|
+
Returns:
|
455
|
+
Dictionary with statistics
|
456
|
+
"""
|
457
|
+
queryset = self.get_queryset()
|
458
|
+
if user:
|
459
|
+
queryset = queryset.filter(user=user)
|
460
|
+
|
461
|
+
return queryset.aggregate(
|
462
|
+
total_chunks=Count('id'),
|
463
|
+
total_tokens=Sum('token_count'),
|
464
|
+
total_characters=Sum('character_count'),
|
465
|
+
total_cost=Sum('embedding_cost'),
|
466
|
+
avg_tokens=Avg('token_count'),
|
467
|
+
avg_characters=Avg('character_count'),
|
468
|
+
avg_cost=Avg('embedding_cost'),
|
469
|
+
max_tokens=models.Max('token_count'),
|
470
|
+
min_tokens=models.Min('token_count'),
|
471
|
+
)
|