django-cfg 1.1.82__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (244) hide show
  1. django_cfg/__init__.py +20 -448
  2. django_cfg/apps/accounts/README.md +3 -3
  3. django_cfg/apps/accounts/admin/__init__.py +0 -2
  4. django_cfg/apps/accounts/admin/activity.py +2 -9
  5. django_cfg/apps/accounts/admin/filters.py +0 -42
  6. django_cfg/apps/accounts/admin/inlines.py +8 -8
  7. django_cfg/apps/accounts/admin/otp.py +5 -5
  8. django_cfg/apps/accounts/admin/registration_source.py +1 -8
  9. django_cfg/apps/accounts/admin/user.py +12 -20
  10. django_cfg/apps/accounts/managers/user_manager.py +2 -129
  11. django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
  12. django_cfg/apps/accounts/models.py +3 -123
  13. django_cfg/apps/accounts/serializers/otp.py +40 -44
  14. django_cfg/apps/accounts/serializers/profile.py +0 -2
  15. django_cfg/apps/accounts/services/otp_service.py +98 -186
  16. django_cfg/apps/accounts/signals.py +25 -15
  17. django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
  18. django_cfg/apps/accounts/views/otp.py +35 -36
  19. django_cfg/apps/agents/README.md +129 -0
  20. django_cfg/apps/agents/__init__.py +68 -0
  21. django_cfg/apps/agents/admin/__init__.py +17 -0
  22. django_cfg/apps/agents/admin/execution_admin.py +460 -0
  23. django_cfg/apps/agents/admin/registry_admin.py +360 -0
  24. django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
  25. django_cfg/apps/agents/apps.py +29 -0
  26. django_cfg/apps/agents/core/__init__.py +20 -0
  27. django_cfg/apps/agents/core/agent.py +281 -0
  28. django_cfg/apps/agents/core/dependencies.py +154 -0
  29. django_cfg/apps/agents/core/exceptions.py +66 -0
  30. django_cfg/apps/agents/core/models.py +106 -0
  31. django_cfg/apps/agents/core/orchestrator.py +391 -0
  32. django_cfg/apps/agents/examples/__init__.py +3 -0
  33. django_cfg/apps/agents/examples/simple_example.py +161 -0
  34. django_cfg/apps/agents/integration/__init__.py +14 -0
  35. django_cfg/apps/agents/integration/middleware.py +80 -0
  36. django_cfg/apps/agents/integration/registry.py +345 -0
  37. django_cfg/apps/agents/integration/signals.py +50 -0
  38. django_cfg/apps/agents/management/__init__.py +3 -0
  39. django_cfg/apps/agents/management/commands/__init__.py +3 -0
  40. django_cfg/apps/agents/management/commands/create_agent.py +365 -0
  41. django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
  42. django_cfg/apps/agents/managers/__init__.py +23 -0
  43. django_cfg/apps/agents/managers/execution.py +236 -0
  44. django_cfg/apps/agents/managers/registry.py +254 -0
  45. django_cfg/apps/agents/managers/toolsets.py +496 -0
  46. django_cfg/apps/agents/migrations/0001_initial.py +286 -0
  47. django_cfg/apps/agents/migrations/__init__.py +5 -0
  48. django_cfg/apps/agents/models/__init__.py +15 -0
  49. django_cfg/apps/agents/models/execution.py +215 -0
  50. django_cfg/apps/agents/models/registry.py +220 -0
  51. django_cfg/apps/agents/models/toolsets.py +305 -0
  52. django_cfg/apps/agents/patterns/__init__.py +24 -0
  53. django_cfg/apps/agents/patterns/content_agents.py +234 -0
  54. django_cfg/apps/agents/toolsets/__init__.py +15 -0
  55. django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
  56. django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
  57. django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
  58. django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
  59. django_cfg/apps/agents/urls.py +46 -0
  60. django_cfg/apps/knowbase/README.md +150 -0
  61. django_cfg/apps/knowbase/__init__.py +27 -0
  62. django_cfg/apps/knowbase/admin/__init__.py +23 -0
  63. django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
  64. django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
  65. django_cfg/apps/knowbase/admin/document_admin.py +650 -0
  66. django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
  67. django_cfg/apps/knowbase/apps.py +81 -0
  68. django_cfg/apps/knowbase/config/README.md +176 -0
  69. django_cfg/apps/knowbase/config/__init__.py +51 -0
  70. django_cfg/apps/knowbase/config/constance_fields.py +186 -0
  71. django_cfg/apps/knowbase/config/constance_settings.py +200 -0
  72. django_cfg/apps/knowbase/config/settings.py +450 -0
  73. django_cfg/apps/knowbase/examples/__init__.py +3 -0
  74. django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
  75. django_cfg/apps/knowbase/management/__init__.py +0 -0
  76. django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
  77. django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
  78. django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
  79. django_cfg/apps/knowbase/managers/__init__.py +22 -0
  80. django_cfg/apps/knowbase/managers/archive.py +426 -0
  81. django_cfg/apps/knowbase/managers/base.py +32 -0
  82. django_cfg/apps/knowbase/managers/chat.py +141 -0
  83. django_cfg/apps/knowbase/managers/document.py +203 -0
  84. django_cfg/apps/knowbase/managers/external_data.py +471 -0
  85. django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
  86. django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
  87. django_cfg/apps/knowbase/migrations/__init__.py +5 -0
  88. django_cfg/apps/knowbase/mixins/__init__.py +15 -0
  89. django_cfg/apps/knowbase/mixins/config.py +108 -0
  90. django_cfg/apps/knowbase/mixins/creator.py +81 -0
  91. django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
  92. django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
  93. django_cfg/apps/knowbase/mixins/service.py +362 -0
  94. django_cfg/apps/knowbase/models/__init__.py +41 -0
  95. django_cfg/apps/knowbase/models/archive.py +599 -0
  96. django_cfg/apps/knowbase/models/base.py +58 -0
  97. django_cfg/apps/knowbase/models/chat.py +157 -0
  98. django_cfg/apps/knowbase/models/document.py +267 -0
  99. django_cfg/apps/knowbase/models/external_data.py +376 -0
  100. django_cfg/apps/knowbase/serializers/__init__.py +68 -0
  101. django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
  102. django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
  103. django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
  104. django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
  105. django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
  106. django_cfg/apps/knowbase/services/__init__.py +40 -0
  107. django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
  108. django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
  109. django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
  110. django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
  111. django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
  112. django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
  113. django_cfg/apps/knowbase/services/base.py +53 -0
  114. django_cfg/apps/knowbase/services/chat_service.py +239 -0
  115. django_cfg/apps/knowbase/services/document_service.py +144 -0
  116. django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
  117. django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
  118. django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
  119. django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
  120. django_cfg/apps/knowbase/services/embedding/models.py +229 -0
  121. django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
  122. django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
  123. django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
  124. django_cfg/apps/knowbase/services/search_service.py +293 -0
  125. django_cfg/apps/knowbase/signals/__init__.py +21 -0
  126. django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
  127. django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
  128. django_cfg/apps/knowbase/signals/document_signals.py +143 -0
  129. django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
  130. django_cfg/apps/knowbase/tasks/__init__.py +39 -0
  131. django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
  132. django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
  133. django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
  134. django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
  135. django_cfg/apps/knowbase/urls.py +43 -0
  136. django_cfg/apps/knowbase/utils/__init__.py +12 -0
  137. django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
  138. django_cfg/apps/knowbase/utils/text_processing.py +375 -0
  139. django_cfg/apps/knowbase/utils/validation.py +99 -0
  140. django_cfg/apps/knowbase/views/__init__.py +28 -0
  141. django_cfg/apps/knowbase/views/archive_views.py +469 -0
  142. django_cfg/apps/knowbase/views/base.py +49 -0
  143. django_cfg/apps/knowbase/views/chat_views.py +181 -0
  144. django_cfg/apps/knowbase/views/document_views.py +183 -0
  145. django_cfg/apps/knowbase/views/public_views.py +129 -0
  146. django_cfg/apps/leads/admin.py +70 -0
  147. django_cfg/apps/newsletter/admin.py +234 -0
  148. django_cfg/apps/newsletter/admin_filters.py +124 -0
  149. django_cfg/apps/support/admin.py +196 -0
  150. django_cfg/apps/support/admin_filters.py +71 -0
  151. django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
  152. django_cfg/apps/urls.py +5 -4
  153. django_cfg/cli/README.md +1 -1
  154. django_cfg/cli/commands/create_project.py +2 -2
  155. django_cfg/cli/commands/info.py +1 -1
  156. django_cfg/config.py +44 -0
  157. django_cfg/core/config.py +29 -82
  158. django_cfg/core/environment.py +1 -1
  159. django_cfg/core/generation.py +19 -107
  160. django_cfg/{integration.py → core/integration.py} +18 -16
  161. django_cfg/core/validation.py +1 -1
  162. django_cfg/management/__init__.py +1 -1
  163. django_cfg/management/commands/__init__.py +1 -1
  164. django_cfg/management/commands/auto_generate.py +482 -0
  165. django_cfg/management/commands/migrator.py +19 -101
  166. django_cfg/management/commands/test_email.py +1 -1
  167. django_cfg/middleware/README.md +0 -158
  168. django_cfg/middleware/__init__.py +0 -2
  169. django_cfg/middleware/user_activity.py +3 -3
  170. django_cfg/models/api.py +145 -0
  171. django_cfg/models/base.py +287 -0
  172. django_cfg/models/cache.py +4 -4
  173. django_cfg/models/constance.py +25 -88
  174. django_cfg/models/database.py +9 -9
  175. django_cfg/models/drf.py +3 -36
  176. django_cfg/models/email.py +163 -0
  177. django_cfg/models/environment.py +276 -0
  178. django_cfg/models/limits.py +1 -1
  179. django_cfg/models/logging.py +366 -0
  180. django_cfg/models/revolution.py +41 -2
  181. django_cfg/models/security.py +125 -0
  182. django_cfg/models/services.py +1 -1
  183. django_cfg/modules/__init__.py +2 -56
  184. django_cfg/modules/base.py +78 -52
  185. django_cfg/modules/django_currency/service.py +2 -2
  186. django_cfg/modules/django_email.py +2 -2
  187. django_cfg/modules/django_health.py +267 -0
  188. django_cfg/modules/django_llm/llm/client.py +91 -19
  189. django_cfg/modules/django_llm/translator/translator.py +2 -2
  190. django_cfg/modules/django_logger.py +2 -2
  191. django_cfg/modules/django_ngrok.py +2 -2
  192. django_cfg/modules/django_tasks.py +68 -3
  193. django_cfg/modules/django_telegram.py +3 -3
  194. django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
  195. django_cfg/modules/django_twilio/service.py +2 -2
  196. django_cfg/modules/django_twilio/simple_service.py +2 -2
  197. django_cfg/modules/django_twilio/twilio_service.py +2 -2
  198. django_cfg/modules/django_unfold/__init__.py +69 -0
  199. django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
  200. django_cfg/modules/django_unfold/dashboard.py +278 -0
  201. django_cfg/modules/django_unfold/icons/README.md +145 -0
  202. django_cfg/modules/django_unfold/icons/__init__.py +12 -0
  203. django_cfg/modules/django_unfold/icons/constants.py +2851 -0
  204. django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
  205. django_cfg/modules/django_unfold/models/__init__.py +42 -0
  206. django_cfg/modules/django_unfold/models/config.py +601 -0
  207. django_cfg/modules/django_unfold/models/dashboard.py +206 -0
  208. django_cfg/modules/django_unfold/models/dropdown.py +40 -0
  209. django_cfg/modules/django_unfold/models/navigation.py +73 -0
  210. django_cfg/modules/django_unfold/models/tabs.py +25 -0
  211. django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
  212. django_cfg/modules/django_unfold/utils.py +140 -0
  213. django_cfg/registry/__init__.py +23 -0
  214. django_cfg/registry/core.py +61 -0
  215. django_cfg/registry/exceptions.py +11 -0
  216. django_cfg/registry/modules.py +12 -0
  217. django_cfg/registry/services.py +26 -0
  218. django_cfg/registry/third_party.py +52 -0
  219. django_cfg/routing/__init__.py +19 -0
  220. django_cfg/routing/callbacks.py +198 -0
  221. django_cfg/routing/routers.py +48 -0
  222. django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
  223. django_cfg/templatetags/__init__.py +0 -0
  224. django_cfg/templatetags/django_cfg.py +33 -0
  225. django_cfg/urls.py +33 -0
  226. django_cfg/utils/path_resolution.py +1 -1
  227. django_cfg/utils/smart_defaults.py +7 -61
  228. django_cfg/utils/toolkit.py +663 -0
  229. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/METADATA +83 -86
  230. django_cfg-1.2.1.dist-info/RECORD +441 -0
  231. django_cfg/archive/django_sample.zip +0 -0
  232. django_cfg/models/unfold.py +0 -271
  233. django_cfg/modules/unfold/__init__.py +0 -29
  234. django_cfg/modules/unfold/dashboard.py +0 -318
  235. django_cfg/pyproject.toml +0 -370
  236. django_cfg/routers.py +0 -83
  237. django_cfg-1.1.82.dist-info/RECORD +0 -278
  238. /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
  239. /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
  240. /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
  241. /django_cfg/{version_check.py → utils/version_check.py} +0 -0
  242. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/WHEEL +0 -0
  243. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/entry_points.txt +0 -0
  244. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,471 @@
1
+ """
2
+ External Data managers for advanced querying and operations.
3
+ """
4
+
5
+ from django.db import models
6
+ from django.db.models import Count, Q, Avg, Sum
7
+ from django.utils import timezone
8
+ from typing import Optional, List, Dict, Any
9
+ from datetime import timedelta
10
+
11
+ from .base import BaseKnowbaseManager
12
+
13
+
14
+ class ExternalDataQuerySet(models.QuerySet):
15
+ """Custom QuerySet for ExternalData with advanced filtering."""
16
+
17
+ def active(self):
18
+ """Filter to active external data sources."""
19
+ return self.filter(is_active=True)
20
+
21
+ def public(self):
22
+ """Filter to public external data sources."""
23
+ return self.filter(is_public=True)
24
+
25
+ def processed(self):
26
+ """Filter to successfully processed external data."""
27
+ return self.filter(status='completed')
28
+
29
+ def failed(self):
30
+ """Filter to failed external data."""
31
+ return self.filter(status='failed')
32
+
33
+ def outdated(self):
34
+ """Filter to outdated external data that needs reprocessing."""
35
+ return self.filter(
36
+ Q(status='outdated') |
37
+ Q(source_updated_at__gt=models.F('processed_at'))
38
+ )
39
+
40
+ def by_source_type(self, source_type: str):
41
+ """Filter by source type."""
42
+ return self.filter(source_type=source_type)
43
+
44
+ def by_status(self, status: str):
45
+ """Filter by status."""
46
+ return self.filter(status=status)
47
+
48
+ def get_processing_statistics(self):
49
+ """Get processing statistics for external data."""
50
+ from django.db.models import Count, Q
51
+
52
+ stats = self.aggregate(
53
+ total=Count('id'),
54
+ pending=Count('id', filter=Q(status='pending')),
55
+ processing=Count('id', filter=Q(status='processing')),
56
+ completed=Count('id', filter=Q(status='completed')),
57
+ failed=Count('id', filter=Q(status='failed')),
58
+ )
59
+
60
+ return {
61
+ 'total_external_data': stats['total'],
62
+ 'pending_processing': stats['pending'],
63
+ 'currently_processing': stats['processing'],
64
+ 'completed_processing': stats['completed'],
65
+ 'failed_processing': stats['failed'],
66
+ }
67
+
68
+ def by_category(self, category):
69
+ """Filter by category."""
70
+ return self.filter(category=category)
71
+
72
+ def with_tags(self, tags: List[str]):
73
+ """Filter external data that contains any of the specified tags."""
74
+ if not tags:
75
+ return self
76
+
77
+ q = Q()
78
+ for tag in tags:
79
+ q |= Q(tags__contains=[tag])
80
+ return self.filter(q)
81
+
82
+ def search_content(self, query: str):
83
+ """Search in title, description, and content."""
84
+ return self.filter(
85
+ Q(title__icontains=query) |
86
+ Q(description__icontains=query) |
87
+ Q(content__icontains=query)
88
+ )
89
+
90
+ def recent(self, days: int = 7):
91
+ """Filter to recently processed external data."""
92
+ cutoff = timezone.now() - timedelta(days=days)
93
+ return self.filter(processed_at__gte=cutoff)
94
+
95
+ def with_chunks(self):
96
+ """Filter to external data that has chunks."""
97
+ return self.filter(total_chunks__gt=0)
98
+
99
+ def without_chunks(self):
100
+ """Filter to external data without chunks."""
101
+ return self.filter(total_chunks=0)
102
+
103
+ def expensive(self, min_cost: float = 0.01):
104
+ """Filter to external data with high processing costs."""
105
+ return self.filter(processing_cost__gte=min_cost)
106
+
107
+ def with_statistics(self):
108
+ """Annotate with chunk and cost statistics."""
109
+ return self.annotate(
110
+ chunks_count=Count('chunks'),
111
+ avg_chunk_tokens=Avg('chunks__token_count'),
112
+ total_embedding_cost=Sum('chunks__embedding_cost')
113
+ )
114
+
115
+
116
+ class ExternalDataManager(BaseKnowbaseManager):
117
+ """Manager for ExternalData with user scoping and advanced queries."""
118
+
119
+ def get_queryset(self):
120
+ return ExternalDataQuerySet(self.model, using=self._db)
121
+
122
+ def active(self):
123
+ """Get active external data sources."""
124
+ return self.get_queryset().active()
125
+
126
+ def public(self):
127
+ """Get public external data sources."""
128
+ return self.get_queryset().public()
129
+
130
+ def processed(self):
131
+ """Get successfully processed external data."""
132
+ return self.get_queryset().processed()
133
+
134
+ def failed(self):
135
+ """Get failed external data."""
136
+ return self.get_queryset().failed()
137
+
138
+ def outdated(self):
139
+ """Get outdated external data that needs reprocessing."""
140
+ return self.get_queryset().outdated()
141
+
142
+ def by_source_type(self, source_type: str):
143
+ """Get external data by source type."""
144
+ return self.get_queryset().by_source_type(source_type)
145
+
146
+ def by_status(self, status: str):
147
+ """Get external data by status."""
148
+ return self.get_queryset().by_status(status)
149
+
150
+ def by_category(self, category):
151
+ """Get external data by category."""
152
+ return self.get_queryset().by_category(category)
153
+
154
+ def with_tags(self, tags: List[str]):
155
+ """Get external data with specified tags."""
156
+ return self.get_queryset().with_tags(tags)
157
+
158
+ def search_content(self, query: str):
159
+ """Search external data content."""
160
+ return self.get_queryset().search_content(query)
161
+
162
+ def recent(self, days: int = 7):
163
+ """Get recently processed external data."""
164
+ return self.get_queryset().recent(days)
165
+
166
+ def with_chunks(self):
167
+ """Get external data that has chunks."""
168
+ return self.get_queryset().with_chunks()
169
+
170
+ def without_chunks(self):
171
+ """Get external data without chunks."""
172
+ return self.get_queryset().without_chunks()
173
+
174
+ def expensive(self, min_cost: float = 0.01):
175
+ """Get external data with high processing costs."""
176
+ return self.get_queryset().expensive(min_cost)
177
+
178
+ def with_statistics(self):
179
+ """Get external data with statistics."""
180
+ return self.get_queryset().with_statistics()
181
+
182
+ def create_from_source(
183
+ self,
184
+ user,
185
+ title: str,
186
+ source_type: str,
187
+ source_identifier: str,
188
+ content: str,
189
+ source_config: Optional[Dict[str, Any]] = None,
190
+ metadata: Optional[Dict[str, Any]] = None,
191
+ **kwargs
192
+ ):
193
+ """
194
+ Create external data from a source.
195
+
196
+ Args:
197
+ user: User creating the external data
198
+ title: Human-readable title
199
+ source_type: Type of source (model, api, etc.)
200
+ source_identifier: Unique identifier for the source
201
+ content: Extracted content
202
+ source_config: Configuration for data extraction
203
+ metadata: Additional metadata
204
+ **kwargs: Additional fields
205
+
206
+ Returns:
207
+ ExternalData instance
208
+ """
209
+ return self.create(
210
+ user=user,
211
+ title=title,
212
+ source_type=source_type,
213
+ source_identifier=source_identifier,
214
+ content=content,
215
+ source_config=source_config or {},
216
+ metadata=metadata or {},
217
+ **kwargs
218
+ )
219
+
220
+ def get_or_create_from_source(
221
+ self,
222
+ user,
223
+ source_identifier: str,
224
+ defaults: Optional[Dict[str, Any]] = None
225
+ ):
226
+ """
227
+ Get or create external data for a source identifier.
228
+
229
+ Args:
230
+ user: User
231
+ source_identifier: Unique identifier for the source
232
+ defaults: Default values for creation
233
+
234
+ Returns:
235
+ Tuple of (ExternalData, created)
236
+ """
237
+ return self.get_or_create(
238
+ user=user,
239
+ source_identifier=source_identifier,
240
+ defaults=defaults or {}
241
+ )
242
+
243
+ def bulk_update_status(self, external_data_ids: List[str], status: str):
244
+ """
245
+ Bulk update status for multiple external data sources.
246
+
247
+ Args:
248
+ external_data_ids: List of external data IDs
249
+ status: New status
250
+
251
+ Returns:
252
+ Number of updated records
253
+ """
254
+ return self.filter(id__in=external_data_ids).update(
255
+ status=status,
256
+ updated_at=timezone.now()
257
+ )
258
+
259
+ def get_processing_statistics(self, user=None) -> Dict[str, Any]:
260
+ """
261
+ Get processing statistics for external data.
262
+
263
+ Args:
264
+ user: Optional user filter
265
+
266
+ Returns:
267
+ Dictionary with statistics
268
+ """
269
+ queryset = self.get_queryset()
270
+ if user:
271
+ queryset = queryset.filter(user=user)
272
+
273
+ stats = queryset.aggregate(
274
+ total_count=Count('id'),
275
+ processed_count=Count('id', filter=Q(status='completed')),
276
+ failed_count=Count('id', filter=Q(status='failed')),
277
+ pending_count=Count('id', filter=Q(status='pending')),
278
+ outdated_count=Count('id', filter=Q(status='outdated')),
279
+ total_chunks=Sum('total_chunks'),
280
+ total_tokens=Sum('total_tokens'),
281
+ total_cost=Sum('processing_cost'),
282
+ avg_chunk_size=Avg('chunk_size'),
283
+ )
284
+
285
+ # Calculate percentages
286
+ total = stats['total_count'] or 0
287
+ if total > 0:
288
+ stats['processed_percentage'] = (stats['processed_count'] or 0) / total * 100
289
+ stats['failed_percentage'] = (stats['failed_count'] or 0) / total * 100
290
+ stats['pending_percentage'] = (stats['pending_count'] or 0) / total * 100
291
+ stats['outdated_percentage'] = (stats['outdated_count'] or 0) / total * 100
292
+ else:
293
+ stats['processed_percentage'] = 0
294
+ stats['failed_percentage'] = 0
295
+ stats['pending_percentage'] = 0
296
+ stats['outdated_percentage'] = 0
297
+
298
+ return stats
299
+
300
+ def cleanup_failed(self, older_than_days: int = 7) -> int:
301
+ """
302
+ Clean up old failed external data sources.
303
+
304
+ Args:
305
+ older_than_days: Remove failed sources older than this many days
306
+
307
+ Returns:
308
+ Number of deleted records
309
+ """
310
+ cutoff = timezone.now() - timedelta(days=older_than_days)
311
+ failed_queryset = self.failed().filter(updated_at__lt=cutoff)
312
+ count = failed_queryset.count()
313
+ failed_queryset.delete()
314
+ return count
315
+
316
+ def regenerate_external_data(self, external_data_ids: List[str]) -> Dict[str, Any]:
317
+ """
318
+ Regenerate embeddings for specified external data sources.
319
+
320
+ Args:
321
+ external_data_ids: List of external data IDs to regenerate
322
+
323
+ Returns:
324
+ Dictionary with regeneration results
325
+ """
326
+ from ..models.external_data import ExternalDataStatus
327
+ from ..tasks.external_data_tasks import process_external_data_async
328
+
329
+ external_data_list = list(self.get_queryset().filter(id__in=external_data_ids))
330
+
331
+ if not external_data_list:
332
+ return {
333
+ 'success': False,
334
+ 'error': 'No external data found with provided IDs',
335
+ 'regenerated_count': 0,
336
+ 'failed_count': 0
337
+ }
338
+
339
+ regenerated_count = 0
340
+ failed_count = 0
341
+ errors = []
342
+
343
+ for external_data in external_data_list:
344
+ try:
345
+ # Reset processing state
346
+ external_data.status = ExternalDataStatus.PENDING
347
+ external_data.processing_error = ""
348
+ external_data.processed_at = None
349
+ external_data.total_chunks = 0
350
+ external_data.total_tokens = 0
351
+ external_data.processing_cost = 0.0
352
+ external_data.save(update_fields=[
353
+ 'status', 'processing_error', 'processed_at',
354
+ 'total_chunks', 'total_tokens', 'processing_cost'
355
+ ])
356
+
357
+ # Clear existing chunks
358
+ external_data.chunks.all().delete()
359
+
360
+ # Queue for reprocessing with force flag
361
+ process_external_data_async.send(
362
+ str(external_data.id),
363
+ force_reprocess=True
364
+ )
365
+
366
+ regenerated_count += 1
367
+
368
+ except Exception as e:
369
+ failed_count += 1
370
+ errors.append(f"Failed to regenerate {external_data.title}: {str(e)}")
371
+
372
+ return {
373
+ 'success': regenerated_count > 0,
374
+ 'regenerated_count': regenerated_count,
375
+ 'failed_count': failed_count,
376
+ 'total_count': len(external_data_list),
377
+ 'errors': errors
378
+ }
379
+
380
+
381
+ class ExternalDataChunkQuerySet(models.QuerySet):
382
+ """Custom QuerySet for ExternalDataChunk."""
383
+
384
+ def by_external_data(self, external_data):
385
+ """Filter by external data."""
386
+ return self.filter(external_data=external_data)
387
+
388
+ def by_embedding_model(self, model: str):
389
+ """Filter by embedding model."""
390
+ return self.filter(embedding_model=model)
391
+
392
+ def with_embeddings(self):
393
+ """Filter to chunks that have embeddings."""
394
+ return self.filter(embedding__isnull=False)
395
+
396
+ def without_embeddings(self):
397
+ """Filter to chunks without embeddings."""
398
+ return self.filter(embedding__isnull=True)
399
+
400
+ def large_chunks(self, min_tokens: int = 500):
401
+ """Filter to large chunks."""
402
+ return self.filter(token_count__gte=min_tokens)
403
+
404
+ def small_chunks(self, max_tokens: int = 100):
405
+ """Filter to small chunks."""
406
+ return self.filter(token_count__lte=max_tokens)
407
+
408
+ def expensive_chunks(self, min_cost: float = 0.001):
409
+ """Filter to expensive chunks."""
410
+ return self.filter(embedding_cost__gte=min_cost)
411
+
412
+
413
+ class ExternalDataChunkManager(models.Manager):
414
+ """Manager for ExternalDataChunk."""
415
+
416
+ def get_queryset(self):
417
+ return ExternalDataChunkQuerySet(self.model, using=self._db)
418
+
419
+ def by_external_data(self, external_data):
420
+ """Get chunks for external data."""
421
+ return self.get_queryset().by_external_data(external_data)
422
+
423
+ def by_embedding_model(self, model: str):
424
+ """Get chunks by embedding model."""
425
+ return self.get_queryset().by_embedding_model(model)
426
+
427
+ def with_embeddings(self):
428
+ """Get chunks with embeddings."""
429
+ return self.get_queryset().with_embeddings()
430
+
431
+ def without_embeddings(self):
432
+ """Get chunks without embeddings."""
433
+ return self.get_queryset().without_embeddings()
434
+
435
+ def large_chunks(self, min_tokens: int = 500):
436
+ """Get large chunks."""
437
+ return self.get_queryset().large_chunks(min_tokens)
438
+
439
+ def small_chunks(self, max_tokens: int = 100):
440
+ """Get small chunks."""
441
+ return self.get_queryset().small_chunks(max_tokens)
442
+
443
+ def expensive_chunks(self, min_cost: float = 0.001):
444
+ """Get expensive chunks."""
445
+ return self.get_queryset().expensive_chunks(min_cost)
446
+
447
+ def get_chunk_statistics(self, user=None) -> Dict[str, Any]:
448
+ """
449
+ Get chunk statistics.
450
+
451
+ Args:
452
+ user: Optional user filter
453
+
454
+ Returns:
455
+ Dictionary with statistics
456
+ """
457
+ queryset = self.get_queryset()
458
+ if user:
459
+ queryset = queryset.filter(user=user)
460
+
461
+ return queryset.aggregate(
462
+ total_chunks=Count('id'),
463
+ total_tokens=Sum('token_count'),
464
+ total_characters=Sum('character_count'),
465
+ total_cost=Sum('embedding_cost'),
466
+ avg_tokens=Avg('token_count'),
467
+ avg_characters=Avg('character_count'),
468
+ avg_cost=Avg('embedding_cost'),
469
+ max_tokens=models.Max('token_count'),
470
+ min_tokens=models.Min('token_count'),
471
+ )