django-cfg 1.1.82__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (244) hide show
  1. django_cfg/__init__.py +20 -448
  2. django_cfg/apps/accounts/README.md +3 -3
  3. django_cfg/apps/accounts/admin/__init__.py +0 -2
  4. django_cfg/apps/accounts/admin/activity.py +2 -9
  5. django_cfg/apps/accounts/admin/filters.py +0 -42
  6. django_cfg/apps/accounts/admin/inlines.py +8 -8
  7. django_cfg/apps/accounts/admin/otp.py +5 -5
  8. django_cfg/apps/accounts/admin/registration_source.py +1 -8
  9. django_cfg/apps/accounts/admin/user.py +12 -20
  10. django_cfg/apps/accounts/managers/user_manager.py +2 -129
  11. django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
  12. django_cfg/apps/accounts/models.py +3 -123
  13. django_cfg/apps/accounts/serializers/otp.py +40 -44
  14. django_cfg/apps/accounts/serializers/profile.py +0 -2
  15. django_cfg/apps/accounts/services/otp_service.py +98 -186
  16. django_cfg/apps/accounts/signals.py +25 -15
  17. django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
  18. django_cfg/apps/accounts/views/otp.py +35 -36
  19. django_cfg/apps/agents/README.md +129 -0
  20. django_cfg/apps/agents/__init__.py +68 -0
  21. django_cfg/apps/agents/admin/__init__.py +17 -0
  22. django_cfg/apps/agents/admin/execution_admin.py +460 -0
  23. django_cfg/apps/agents/admin/registry_admin.py +360 -0
  24. django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
  25. django_cfg/apps/agents/apps.py +29 -0
  26. django_cfg/apps/agents/core/__init__.py +20 -0
  27. django_cfg/apps/agents/core/agent.py +281 -0
  28. django_cfg/apps/agents/core/dependencies.py +154 -0
  29. django_cfg/apps/agents/core/exceptions.py +66 -0
  30. django_cfg/apps/agents/core/models.py +106 -0
  31. django_cfg/apps/agents/core/orchestrator.py +391 -0
  32. django_cfg/apps/agents/examples/__init__.py +3 -0
  33. django_cfg/apps/agents/examples/simple_example.py +161 -0
  34. django_cfg/apps/agents/integration/__init__.py +14 -0
  35. django_cfg/apps/agents/integration/middleware.py +80 -0
  36. django_cfg/apps/agents/integration/registry.py +345 -0
  37. django_cfg/apps/agents/integration/signals.py +50 -0
  38. django_cfg/apps/agents/management/__init__.py +3 -0
  39. django_cfg/apps/agents/management/commands/__init__.py +3 -0
  40. django_cfg/apps/agents/management/commands/create_agent.py +365 -0
  41. django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
  42. django_cfg/apps/agents/managers/__init__.py +23 -0
  43. django_cfg/apps/agents/managers/execution.py +236 -0
  44. django_cfg/apps/agents/managers/registry.py +254 -0
  45. django_cfg/apps/agents/managers/toolsets.py +496 -0
  46. django_cfg/apps/agents/migrations/0001_initial.py +286 -0
  47. django_cfg/apps/agents/migrations/__init__.py +5 -0
  48. django_cfg/apps/agents/models/__init__.py +15 -0
  49. django_cfg/apps/agents/models/execution.py +215 -0
  50. django_cfg/apps/agents/models/registry.py +220 -0
  51. django_cfg/apps/agents/models/toolsets.py +305 -0
  52. django_cfg/apps/agents/patterns/__init__.py +24 -0
  53. django_cfg/apps/agents/patterns/content_agents.py +234 -0
  54. django_cfg/apps/agents/toolsets/__init__.py +15 -0
  55. django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
  56. django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
  57. django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
  58. django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
  59. django_cfg/apps/agents/urls.py +46 -0
  60. django_cfg/apps/knowbase/README.md +150 -0
  61. django_cfg/apps/knowbase/__init__.py +27 -0
  62. django_cfg/apps/knowbase/admin/__init__.py +23 -0
  63. django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
  64. django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
  65. django_cfg/apps/knowbase/admin/document_admin.py +650 -0
  66. django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
  67. django_cfg/apps/knowbase/apps.py +81 -0
  68. django_cfg/apps/knowbase/config/README.md +176 -0
  69. django_cfg/apps/knowbase/config/__init__.py +51 -0
  70. django_cfg/apps/knowbase/config/constance_fields.py +186 -0
  71. django_cfg/apps/knowbase/config/constance_settings.py +200 -0
  72. django_cfg/apps/knowbase/config/settings.py +450 -0
  73. django_cfg/apps/knowbase/examples/__init__.py +3 -0
  74. django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
  75. django_cfg/apps/knowbase/management/__init__.py +0 -0
  76. django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
  77. django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
  78. django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
  79. django_cfg/apps/knowbase/managers/__init__.py +22 -0
  80. django_cfg/apps/knowbase/managers/archive.py +426 -0
  81. django_cfg/apps/knowbase/managers/base.py +32 -0
  82. django_cfg/apps/knowbase/managers/chat.py +141 -0
  83. django_cfg/apps/knowbase/managers/document.py +203 -0
  84. django_cfg/apps/knowbase/managers/external_data.py +471 -0
  85. django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
  86. django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
  87. django_cfg/apps/knowbase/migrations/__init__.py +5 -0
  88. django_cfg/apps/knowbase/mixins/__init__.py +15 -0
  89. django_cfg/apps/knowbase/mixins/config.py +108 -0
  90. django_cfg/apps/knowbase/mixins/creator.py +81 -0
  91. django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
  92. django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
  93. django_cfg/apps/knowbase/mixins/service.py +362 -0
  94. django_cfg/apps/knowbase/models/__init__.py +41 -0
  95. django_cfg/apps/knowbase/models/archive.py +599 -0
  96. django_cfg/apps/knowbase/models/base.py +58 -0
  97. django_cfg/apps/knowbase/models/chat.py +157 -0
  98. django_cfg/apps/knowbase/models/document.py +267 -0
  99. django_cfg/apps/knowbase/models/external_data.py +376 -0
  100. django_cfg/apps/knowbase/serializers/__init__.py +68 -0
  101. django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
  102. django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
  103. django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
  104. django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
  105. django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
  106. django_cfg/apps/knowbase/services/__init__.py +40 -0
  107. django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
  108. django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
  109. django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
  110. django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
  111. django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
  112. django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
  113. django_cfg/apps/knowbase/services/base.py +53 -0
  114. django_cfg/apps/knowbase/services/chat_service.py +239 -0
  115. django_cfg/apps/knowbase/services/document_service.py +144 -0
  116. django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
  117. django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
  118. django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
  119. django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
  120. django_cfg/apps/knowbase/services/embedding/models.py +229 -0
  121. django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
  122. django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
  123. django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
  124. django_cfg/apps/knowbase/services/search_service.py +293 -0
  125. django_cfg/apps/knowbase/signals/__init__.py +21 -0
  126. django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
  127. django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
  128. django_cfg/apps/knowbase/signals/document_signals.py +143 -0
  129. django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
  130. django_cfg/apps/knowbase/tasks/__init__.py +39 -0
  131. django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
  132. django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
  133. django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
  134. django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
  135. django_cfg/apps/knowbase/urls.py +43 -0
  136. django_cfg/apps/knowbase/utils/__init__.py +12 -0
  137. django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
  138. django_cfg/apps/knowbase/utils/text_processing.py +375 -0
  139. django_cfg/apps/knowbase/utils/validation.py +99 -0
  140. django_cfg/apps/knowbase/views/__init__.py +28 -0
  141. django_cfg/apps/knowbase/views/archive_views.py +469 -0
  142. django_cfg/apps/knowbase/views/base.py +49 -0
  143. django_cfg/apps/knowbase/views/chat_views.py +181 -0
  144. django_cfg/apps/knowbase/views/document_views.py +183 -0
  145. django_cfg/apps/knowbase/views/public_views.py +129 -0
  146. django_cfg/apps/leads/admin.py +70 -0
  147. django_cfg/apps/newsletter/admin.py +234 -0
  148. django_cfg/apps/newsletter/admin_filters.py +124 -0
  149. django_cfg/apps/support/admin.py +196 -0
  150. django_cfg/apps/support/admin_filters.py +71 -0
  151. django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
  152. django_cfg/apps/urls.py +5 -4
  153. django_cfg/cli/README.md +1 -1
  154. django_cfg/cli/commands/create_project.py +2 -2
  155. django_cfg/cli/commands/info.py +1 -1
  156. django_cfg/config.py +44 -0
  157. django_cfg/core/config.py +29 -82
  158. django_cfg/core/environment.py +1 -1
  159. django_cfg/core/generation.py +19 -107
  160. django_cfg/{integration.py → core/integration.py} +18 -16
  161. django_cfg/core/validation.py +1 -1
  162. django_cfg/management/__init__.py +1 -1
  163. django_cfg/management/commands/__init__.py +1 -1
  164. django_cfg/management/commands/auto_generate.py +482 -0
  165. django_cfg/management/commands/migrator.py +19 -101
  166. django_cfg/management/commands/test_email.py +1 -1
  167. django_cfg/middleware/README.md +0 -158
  168. django_cfg/middleware/__init__.py +0 -2
  169. django_cfg/middleware/user_activity.py +3 -3
  170. django_cfg/models/api.py +145 -0
  171. django_cfg/models/base.py +287 -0
  172. django_cfg/models/cache.py +4 -4
  173. django_cfg/models/constance.py +25 -88
  174. django_cfg/models/database.py +9 -9
  175. django_cfg/models/drf.py +3 -36
  176. django_cfg/models/email.py +163 -0
  177. django_cfg/models/environment.py +276 -0
  178. django_cfg/models/limits.py +1 -1
  179. django_cfg/models/logging.py +366 -0
  180. django_cfg/models/revolution.py +41 -2
  181. django_cfg/models/security.py +125 -0
  182. django_cfg/models/services.py +1 -1
  183. django_cfg/modules/__init__.py +2 -56
  184. django_cfg/modules/base.py +78 -52
  185. django_cfg/modules/django_currency/service.py +2 -2
  186. django_cfg/modules/django_email.py +2 -2
  187. django_cfg/modules/django_health.py +267 -0
  188. django_cfg/modules/django_llm/llm/client.py +91 -19
  189. django_cfg/modules/django_llm/translator/translator.py +2 -2
  190. django_cfg/modules/django_logger.py +2 -2
  191. django_cfg/modules/django_ngrok.py +2 -2
  192. django_cfg/modules/django_tasks.py +68 -3
  193. django_cfg/modules/django_telegram.py +3 -3
  194. django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
  195. django_cfg/modules/django_twilio/service.py +2 -2
  196. django_cfg/modules/django_twilio/simple_service.py +2 -2
  197. django_cfg/modules/django_twilio/twilio_service.py +2 -2
  198. django_cfg/modules/django_unfold/__init__.py +69 -0
  199. django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
  200. django_cfg/modules/django_unfold/dashboard.py +278 -0
  201. django_cfg/modules/django_unfold/icons/README.md +145 -0
  202. django_cfg/modules/django_unfold/icons/__init__.py +12 -0
  203. django_cfg/modules/django_unfold/icons/constants.py +2851 -0
  204. django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
  205. django_cfg/modules/django_unfold/models/__init__.py +42 -0
  206. django_cfg/modules/django_unfold/models/config.py +601 -0
  207. django_cfg/modules/django_unfold/models/dashboard.py +206 -0
  208. django_cfg/modules/django_unfold/models/dropdown.py +40 -0
  209. django_cfg/modules/django_unfold/models/navigation.py +73 -0
  210. django_cfg/modules/django_unfold/models/tabs.py +25 -0
  211. django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
  212. django_cfg/modules/django_unfold/utils.py +140 -0
  213. django_cfg/registry/__init__.py +23 -0
  214. django_cfg/registry/core.py +61 -0
  215. django_cfg/registry/exceptions.py +11 -0
  216. django_cfg/registry/modules.py +12 -0
  217. django_cfg/registry/services.py +26 -0
  218. django_cfg/registry/third_party.py +52 -0
  219. django_cfg/routing/__init__.py +19 -0
  220. django_cfg/routing/callbacks.py +198 -0
  221. django_cfg/routing/routers.py +48 -0
  222. django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
  223. django_cfg/templatetags/__init__.py +0 -0
  224. django_cfg/templatetags/django_cfg.py +33 -0
  225. django_cfg/urls.py +33 -0
  226. django_cfg/utils/path_resolution.py +1 -1
  227. django_cfg/utils/smart_defaults.py +7 -61
  228. django_cfg/utils/toolkit.py +663 -0
  229. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/METADATA +83 -86
  230. django_cfg-1.2.1.dist-info/RECORD +441 -0
  231. django_cfg/archive/django_sample.zip +0 -0
  232. django_cfg/models/unfold.py +0 -271
  233. django_cfg/modules/unfold/__init__.py +0 -29
  234. django_cfg/modules/unfold/dashboard.py +0 -318
  235. django_cfg/pyproject.toml +0 -370
  236. django_cfg/routers.py +0 -83
  237. django_cfg-1.1.82.dist-info/RECORD +0 -278
  238. /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
  239. /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
  240. /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
  241. /django_cfg/{version_check.py → utils/version_check.py} +0 -0
  242. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/WHEEL +0 -0
  243. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/entry_points.txt +0 -0
  244. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,158 @@
1
+ """
2
+ Knowledge Base statistics command.
3
+ """
4
+
5
+ from django.core.management.base import BaseCommand
6
+ from django.db.models import Count, Sum, Avg, Q, F, ExpressionWrapper
7
+ from django.db.models.functions import Extract
8
+ from django.contrib.auth import get_user_model
9
+ from django.db import models
10
+
11
+ User = get_user_model()
12
+
13
+
14
+ class Command(BaseCommand):
15
+ """Display Knowledge Base statistics."""
16
+
17
+ help = 'Display Knowledge Base usage statistics'
18
+
19
+ def add_arguments(self, parser):
20
+ parser.add_argument(
21
+ '--user',
22
+ type=str,
23
+ help='Show statistics for specific user (username)',
24
+ )
25
+ parser.add_argument(
26
+ '--detailed',
27
+ action='store_true',
28
+ help='Show detailed statistics',
29
+ )
30
+
31
+ def handle(self, *args, **options):
32
+ """Display statistics."""
33
+
34
+ from ...models import Document, DocumentChunk, ChatSession, ChatMessage
35
+
36
+ self.stdout.write(
37
+ self.style.SUCCESS('📊 Knowledge Base Statistics')
38
+ )
39
+ self.stdout.write('=' * 50)
40
+
41
+ # Filter by user if specified
42
+ user_filter = {}
43
+ if options['user']:
44
+ try:
45
+ user = User.objects.get(username=options['user'])
46
+ user_filter['user'] = user
47
+ self.stdout.write(f"👤 User: {user.username}")
48
+ self.stdout.write('-' * 30)
49
+ except User.DoesNotExist:
50
+ self.stdout.write(
51
+ self.style.ERROR(f"User '{options['user']}' not found")
52
+ )
53
+ return
54
+
55
+ # Document statistics
56
+ # Calculate processing duration in database
57
+ processing_time_expr = ExpressionWrapper(
58
+ Extract(F('processing_completed_at') - F('processing_started_at'), 'epoch'),
59
+ output_field=models.FloatField()
60
+ )
61
+
62
+ doc_stats = Document.objects.filter(**user_filter).aggregate(
63
+ total_docs=Count('id'),
64
+ completed_docs=Count('id', filter=Q(processing_status='completed')),
65
+ total_chunks=Sum('chunks_count'),
66
+ total_tokens=Sum('total_tokens'),
67
+ total_cost=Sum('total_cost_usd'),
68
+ avg_processing_time=Avg(processing_time_expr, filter=Q(
69
+ processing_started_at__isnull=False,
70
+ processing_completed_at__isnull=False
71
+ ))
72
+ )
73
+
74
+ self.stdout.write("📄 Documents:")
75
+ self.stdout.write(f" Total: {doc_stats['total_docs'] or 0}")
76
+ self.stdout.write(f" Completed: {doc_stats['completed_docs'] or 0}")
77
+ self.stdout.write(f" Success Rate: {((doc_stats['completed_docs'] or 0) / (doc_stats['total_docs'] or 1) * 100):.1f}%")
78
+
79
+ self.stdout.write("\n📝 Content:")
80
+ self.stdout.write(f" Total Chunks: {doc_stats['total_chunks'] or 0}")
81
+ self.stdout.write(f" Total Tokens: {doc_stats['total_tokens'] or 0}")
82
+
83
+ self.stdout.write("\n💰 Costs:")
84
+ self.stdout.write(f" Total Cost: ${(doc_stats['total_cost'] or 0):.6f}")
85
+
86
+ # Chat statistics
87
+ chat_stats = ChatSession.objects.filter(**user_filter).aggregate(
88
+ total_sessions=Count('id'),
89
+ active_sessions=Count('id', filter=Q(is_active=True)),
90
+ total_messages=Sum('messages_count'),
91
+ total_chat_tokens=Sum('total_tokens_used'),
92
+ total_chat_cost=Sum('total_cost_usd')
93
+ )
94
+
95
+ self.stdout.write("\n💬 Chat:")
96
+ self.stdout.write(f" Total Sessions: {chat_stats['total_sessions'] or 0}")
97
+ self.stdout.write(f" Active Sessions: {chat_stats['active_sessions'] or 0}")
98
+ self.stdout.write(f" Total Messages: {chat_stats['total_messages'] or 0}")
99
+ self.stdout.write(f" Chat Tokens: {chat_stats['total_chat_tokens'] or 0}")
100
+ self.stdout.write(f" Chat Cost: ${(chat_stats['total_chat_cost'] or 0):.6f}")
101
+
102
+ # Detailed statistics
103
+ if options['detailed']:
104
+ self.show_detailed_stats(user_filter)
105
+
106
+ def show_detailed_stats(self, user_filter):
107
+ """Show detailed statistics."""
108
+ from ...models import Document, ChatSession
109
+
110
+ self.stdout.write("\n" + "=" * 50)
111
+ self.stdout.write("📈 Detailed Statistics")
112
+ self.stdout.write("=" * 50)
113
+
114
+ # Processing status breakdown
115
+ status_counts = Document.objects.filter(**user_filter).values(
116
+ 'processing_status'
117
+ ).annotate(count=Count('id'))
118
+
119
+ self.stdout.write("\n📊 Document Status Breakdown:")
120
+ for status in status_counts:
121
+ self.stdout.write(f" {status['processing_status']}: {status['count']}")
122
+
123
+ # Model usage breakdown
124
+ model_counts = ChatSession.objects.filter(**user_filter).values(
125
+ 'model_name'
126
+ ).annotate(count=Count('id'))
127
+
128
+ self.stdout.write("\n🤖 Model Usage:")
129
+ for model in model_counts:
130
+ self.stdout.write(f" {model['model_name']}: {model['count']} sessions")
131
+
132
+ # Top documents by cost
133
+ top_docs = Document.objects.filter(
134
+ **user_filter
135
+ ).order_by('-total_cost_usd')[:5]
136
+
137
+ self.stdout.write("\n💸 Most Expensive Documents:")
138
+ for doc in top_docs:
139
+ self.stdout.write(f" {doc.title[:40]}...: ${doc.total_cost_usd:.6f}")
140
+
141
+ # Recent activity
142
+ from django.utils import timezone
143
+ from datetime import timedelta
144
+
145
+ week_ago = timezone.now() - timedelta(days=7)
146
+ recent_docs = Document.objects.filter(
147
+ **user_filter,
148
+ created_at__gte=week_ago
149
+ ).count()
150
+
151
+ recent_sessions = ChatSession.objects.filter(
152
+ **user_filter,
153
+ created_at__gte=week_ago
154
+ ).count()
155
+
156
+ self.stdout.write("\n📅 Recent Activity (Last 7 Days):")
157
+ self.stdout.write(f" New Documents: {recent_docs}")
158
+ self.stdout.write(f" New Chat Sessions: {recent_sessions}")
@@ -0,0 +1,59 @@
1
+ """
2
+ Setup command for Knowledge Base application.
3
+ """
4
+
5
+ from django.core.management.base import BaseCommand
6
+ from django.db import connection
7
+ from django.core.management import call_command
8
+
9
+
10
+ class Command(BaseCommand):
11
+ """Setup Knowledge Base with pgvector extension and initial data."""
12
+
13
+ help = 'Setup Knowledge Base with pgvector extension and run migrations'
14
+
15
+ def add_arguments(self, parser):
16
+ parser.add_argument(
17
+ '--skip-extensions',
18
+ action='store_true',
19
+ help='Skip creating PostgreSQL extensions',
20
+ )
21
+
22
+ def handle(self, *args, **options):
23
+ """Execute the setup process."""
24
+
25
+ self.stdout.write(
26
+ self.style.SUCCESS('🚀 Setting up Knowledge Base...')
27
+ )
28
+
29
+ # Step 1: Create PostgreSQL extensions
30
+ if not options['skip_extensions']:
31
+ self.create_extensions()
32
+
33
+
34
+ self.stdout.write(
35
+ self.style.SUCCESS('✅ Knowledge Base setup completed!')
36
+ )
37
+
38
+ def create_extensions(self):
39
+ """Create required PostgreSQL extensions."""
40
+ self.stdout.write('📦 Creating PostgreSQL extensions...')
41
+
42
+ try:
43
+ with connection.cursor() as cursor:
44
+ # Create pgvector extension
45
+ cursor.execute("CREATE EXTENSION IF NOT EXISTS vector;")
46
+ self.stdout.write(' ✓ pgvector extension created')
47
+
48
+ # Create pg_trgm for text search
49
+ cursor.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;")
50
+ self.stdout.write(' ✓ pg_trgm extension created')
51
+
52
+ except Exception as e:
53
+ self.stdout.write(
54
+ self.style.ERROR(f' ❌ Failed to create extensions: {e}')
55
+ )
56
+ self.stdout.write(
57
+ self.style.WARNING(' ⚠️ You may need to create extensions manually as superuser')
58
+ )
59
+
@@ -0,0 +1,22 @@
1
+ """
2
+ Custom managers for knowledge base models.
3
+ """
4
+
5
+ from .base import *
6
+ from .document import *
7
+ from .chat import *
8
+ from .archive import *
9
+ from .external_data import *
10
+
11
+ __all__ = [
12
+ 'BaseKnowbaseManager',
13
+ 'DocumentManager',
14
+ 'DocumentChunkManager',
15
+ 'ChatSessionManager',
16
+ 'ChatMessageManager',
17
+ 'DocumentArchiveManager',
18
+ 'ArchiveItemManager',
19
+ 'ArchiveItemChunkManager',
20
+ 'ExternalDataManager',
21
+ 'ExternalDataChunkManager',
22
+ ]
@@ -0,0 +1,426 @@
1
+ """
2
+ Archive managers for document archive models.
3
+ """
4
+
5
+ import logging
6
+ from django.db import models, transaction
7
+ from django.db.models import Count, Sum, Avg, Q
8
+ from typing import Optional, List, Dict, Any
9
+ from django.contrib.auth import get_user_model
10
+
11
+ User = get_user_model()
12
+
13
+
14
+ class DocumentArchiveManager(models.Manager):
15
+ """Custom manager for DocumentArchive model."""
16
+
17
+ def for_user(self, user):
18
+ """Explicitly filter by specific user."""
19
+ return self.get_queryset().filter(user=user)
20
+
21
+ def all_users(self):
22
+ """Get unfiltered queryset (admin use)."""
23
+ return self.get_queryset()
24
+
25
+ def processed(self):
26
+ """Get only processed archives."""
27
+ from ..models.base import ProcessingStatus
28
+ return self.get_queryset().filter(
29
+ processing_status=ProcessingStatus.COMPLETED
30
+ )
31
+
32
+ def pending_processing(self):
33
+ """Get archives pending processing."""
34
+ from ..models.base import ProcessingStatus
35
+ return self.get_queryset().filter(
36
+ processing_status=ProcessingStatus.PENDING
37
+ )
38
+
39
+ def failed_processing(self):
40
+ """Get archives that failed processing."""
41
+ from ..models.base import ProcessingStatus
42
+ return self.get_queryset().filter(
43
+ processing_status=ProcessingStatus.FAILED
44
+ )
45
+
46
+ def by_content_hash(self, content_hash: str):
47
+ """Find archives by content hash."""
48
+ return self.get_queryset().filter(content_hash=content_hash)
49
+
50
+ def by_archive_type(self, archive_type: str):
51
+ """Get archives by type (zip, tar, etc.)."""
52
+ return self.get_queryset().filter(archive_type=archive_type)
53
+
54
+ def with_stats(self):
55
+ """Get archives with item and chunk statistics."""
56
+ return self.get_queryset().select_related('user').prefetch_related(
57
+ 'items', 'chunks', 'categories'
58
+ )
59
+
60
+ def get_processing_statistics(self, user=None) -> Dict[str, Any]:
61
+ """Get archive processing statistics."""
62
+ queryset = self.for_user(user) if user else self.get_queryset()
63
+
64
+ # Get basic statistics
65
+ stats = queryset.aggregate(
66
+ total_archives=Count('id'),
67
+ processed_archives=Count('id', filter=Q(processing_status='completed')),
68
+ failed_archives=Count('id', filter=Q(processing_status='failed')),
69
+ total_items=Sum('total_items'),
70
+ total_chunks=Sum('total_chunks'),
71
+ total_tokens=Sum('total_tokens'),
72
+ total_cost=Sum('total_cost_usd'),
73
+ avg_processing_time=Avg('processing_duration_ms')
74
+ )
75
+
76
+ # Calculate averages manually to avoid aggregate on aggregate error
77
+ if stats['total_archives'] and stats['total_archives'] > 0:
78
+ stats['avg_items_per_archive'] = (stats['total_items'] or 0) / stats['total_archives']
79
+ stats['avg_chunks_per_archive'] = (stats['total_chunks'] or 0) / stats['total_archives']
80
+ else:
81
+ stats['avg_items_per_archive'] = 0
82
+ stats['avg_chunks_per_archive'] = 0
83
+
84
+ return stats
85
+
86
+ def check_duplicate_before_save(self, user, title, file_size, exclude_id=None):
87
+ """Check for duplicate archive before saving. Returns (is_duplicate, existing_archive)."""
88
+ if not title or not file_size:
89
+ return False, None
90
+
91
+ # Use all_users() to bypass user filtering
92
+ query = self.all_users().filter(
93
+ user=user,
94
+ title=title,
95
+ file_size=file_size
96
+ )
97
+
98
+ if exclude_id:
99
+ query = query.exclude(pk=exclude_id)
100
+
101
+ existing_archive = query.first()
102
+ return existing_archive is not None, existing_archive
103
+
104
+ def reprocess(self, archive_id: str) -> bool:
105
+ """
106
+ Reset and reprocess an archive.
107
+
108
+ Args:
109
+ archive_id: ID of the archive to reprocess
110
+
111
+ Returns:
112
+ bool: True if reprocessing was initiated successfully
113
+
114
+ Raises:
115
+ ValueError: If archive not found or has no file
116
+ """
117
+ from ..models.base import ProcessingStatus
118
+ from ..tasks.archive_tasks import process_archive_task
119
+
120
+ logger = logging.getLogger(__name__)
121
+
122
+ try:
123
+ # Import the model directly to avoid queryset issues
124
+ from ..models.archive import DocumentArchive
125
+ # Use Django's default manager to avoid custom queryset issues
126
+ archive = DocumentArchive.objects.get(pk=archive_id)
127
+ except DocumentArchive.DoesNotExist:
128
+ raise ValueError(f"Archive with ID {archive_id} not found")
129
+ except Exception as e:
130
+ raise
131
+
132
+ # Check if archive has a file
133
+ if not archive.archive_file:
134
+ raise ValueError("Archive has no file to process")
135
+
136
+ # Check if archive is already being processed
137
+ if archive.processing_status == ProcessingStatus.PROCESSING:
138
+ raise ValueError(f"Archive {archive.id} is already being processed")
139
+
140
+ # Set processing status immediately to prevent concurrent reprocessing
141
+ # Use select_for_update to prevent race conditions
142
+ with transaction.atomic():
143
+ archive = DocumentArchive.objects.select_for_update().get(pk=archive_id)
144
+ if archive.processing_status == ProcessingStatus.PROCESSING:
145
+ raise ValueError(f"Archive {archive.id} is already being processed by another process")
146
+
147
+ archive.processing_status = ProcessingStatus.PROCESSING
148
+ archive.save(update_fields=['processing_status'])
149
+ logger.info(f"🔒 Locked archive {archive.id} for reprocessing")
150
+
151
+ logger.info(f"🔄 Starting reprocessing for archive {archive.id} ({archive.title})")
152
+
153
+ # Reset processing status and clear error
154
+ archive.processing_status = ProcessingStatus.PENDING
155
+ archive.processing_error = ""
156
+ archive.processing_duration_ms = 0
157
+ archive.processed_at = None
158
+
159
+ # Clear existing items and chunks using Django ORM with proper transaction handling
160
+ from ..models.archive import ArchiveItem, ArchiveItemChunk
161
+
162
+ # Count existing records first
163
+ items_count = ArchiveItem.objects.filter(archive=archive).count()
164
+ chunks_count = ArchiveItemChunk.objects.filter(item__archive=archive).count()
165
+
166
+ logger.info(f"🗑️ Found {items_count} items and {chunks_count} chunks to delete")
167
+
168
+ if items_count > 0 or chunks_count > 0:
169
+ # Delete in separate transaction to ensure complete removal before new processing
170
+ try:
171
+ with transaction.atomic():
172
+ # Delete chunks first (foreign key dependency)
173
+ chunks_deleted, _ = ArchiveItemChunk.objects.filter(item__archive=archive).delete()
174
+
175
+ # Delete items
176
+ items_deleted, _ = ArchiveItem.objects.filter(archive=archive).delete()
177
+
178
+ # Verify deletion outside transaction with retry logic
179
+ import time
180
+ max_retries = 3
181
+ for retry in range(max_retries):
182
+ remaining_items = ArchiveItem.objects.filter(archive=archive).count()
183
+ remaining_chunks = ArchiveItemChunk.objects.filter(item__archive=archive).count()
184
+
185
+ if remaining_items == 0 and remaining_chunks == 0:
186
+ break
187
+
188
+ if retry < max_retries - 1:
189
+ logger.warning(f"⚠️ Retry {retry + 1}: Still {remaining_items} items and {remaining_chunks} chunks remaining, waiting...")
190
+ time.sleep(0.2)
191
+ else:
192
+ logger.error(f"❌ Failed to delete all records after {max_retries} retries! {remaining_items} items and {remaining_chunks} chunks still remain")
193
+ raise ValueError(f"Failed to clear existing archive data after {max_retries} retries. {remaining_items} items and {remaining_chunks} chunks still exist.")
194
+
195
+ logger.info(f"🗑️ Successfully deleted {items_deleted} items and {chunks_deleted} chunks")
196
+
197
+ except Exception as e:
198
+ logger.error(f"❌ Error during deletion: {e}")
199
+ # Reset processing status on error
200
+ archive.processing_status = ProcessingStatus.FAILED
201
+ archive.processing_error = f"Failed to clear existing data: {str(e)}"
202
+ archive.save(update_fields=['processing_status', 'processing_error'])
203
+ raise
204
+ else:
205
+ logger.info("🗑️ No existing records to delete")
206
+
207
+ # Reset statistics
208
+ archive.total_items = 0
209
+ archive.processed_items = 0
210
+ archive.total_chunks = 0
211
+ archive.vectorized_chunks = 0
212
+ archive.total_cost_usd = 0.0
213
+
214
+ archive.save()
215
+ logger.info(f"💾 Archive {archive.id} reset to PENDING status")
216
+
217
+ # Trigger reprocessing directly via task
218
+ process_archive_task.send(str(archive.id), str(archive.user.id))
219
+ logger.info(f"🚀 Queued reprocessing task for archive {archive.id}")
220
+
221
+ return True
222
+
223
+ def get_vectorization_progress(self, archive_id):
224
+ """Get vectorization progress for an archive."""
225
+ try:
226
+ from ..models.archive import ArchiveItemChunk
227
+ from django.db.models import Count, Q
228
+
229
+ # Count chunks using Django ORM
230
+
231
+ # Get all chunks for this archive
232
+ chunks_qs = ArchiveItemChunk.objects.filter(archive_id=archive_id)
233
+
234
+ total = chunks_qs.count()
235
+
236
+ # Count vectorized chunks by checking if embedding has non-zero values
237
+ # We'll check if the first element is not 0.0 as a proxy for non-zero vector
238
+ vectorized = 0
239
+ if total > 0:
240
+ for chunk in chunks_qs.only('embedding'):
241
+ if chunk.embedding is not None and len(chunk.embedding) > 0 and any(x != 0.0 for x in chunk.embedding):
242
+ vectorized += 1
243
+
244
+ return {
245
+ 'total': total,
246
+ 'vectorized': vectorized,
247
+ 'percentage': round((vectorized / total * 100) if total > 0 else 0, 1)
248
+ }
249
+ except Exception as e:
250
+ import logging
251
+ logger = logging.getLogger(__name__)
252
+ logger.error(f"Error getting vectorization progress for archive {archive_id}: {e}")
253
+ return {
254
+ 'total': 0,
255
+ 'vectorized': 0,
256
+ 'percentage': 0
257
+ }
258
+
259
+
260
+ class ArchiveItemManager(models.Manager):
261
+ """Custom manager for ArchiveItem model."""
262
+
263
+ def for_user(self, user):
264
+ """Explicitly filter by specific user."""
265
+ return self.get_queryset().filter(user=user)
266
+
267
+ def all_users(self):
268
+ """Get unfiltered queryset (admin use)."""
269
+ return self.get_queryset()
270
+
271
+ def for_archive(self, archive_id: str):
272
+ """Get items for specific archive."""
273
+ return self.get_queryset().filter(archive_id=archive_id)
274
+
275
+ def by_content_type(self, content_type: str):
276
+ """Get items by content type."""
277
+ return self.get_queryset().filter(content_type=content_type)
278
+
279
+ def by_language(self, language: str):
280
+ """Get items by programming language."""
281
+ return self.get_queryset().filter(language=language)
282
+
283
+ def processable(self):
284
+ """Get only processable items."""
285
+ return self.get_queryset().filter(is_processable=True)
286
+
287
+ def code_files(self):
288
+ """Get only code files."""
289
+ return self.get_queryset().filter(content_type='code')
290
+
291
+ def document_files(self):
292
+ """Get only document files."""
293
+ return self.get_queryset().filter(content_type='document')
294
+
295
+ def data_files(self):
296
+ """Get only data files."""
297
+ return self.get_queryset().filter(content_type='data')
298
+
299
+ def with_chunks(self):
300
+ """Get items with their chunks."""
301
+ return self.get_queryset().prefetch_related('chunks')
302
+
303
+ def get_content_type_distribution(self, archive_id: Optional[str] = None) -> Dict[str, int]:
304
+ """Get distribution of content types."""
305
+ queryset = self.get_queryset()
306
+ if archive_id:
307
+ queryset = queryset.filter(archive_id=archive_id)
308
+
309
+ return dict(
310
+ queryset.values('content_type').annotate(
311
+ count=Count('id')
312
+ ).values_list('content_type', 'count')
313
+ )
314
+
315
+ def get_language_distribution(self, archive_id: Optional[str] = None) -> Dict[str, int]:
316
+ """Get distribution of programming languages."""
317
+ queryset = self.get_queryset().filter(language__isnull=False).exclude(language='')
318
+ if archive_id:
319
+ queryset = queryset.filter(archive_id=archive_id)
320
+
321
+ return dict(
322
+ queryset.values('language').annotate(
323
+ count=Count('id')
324
+ ).values_list('language', 'count')
325
+ )
326
+
327
+
328
+ class ArchiveItemChunkManager(models.Manager):
329
+ """Custom manager for ArchiveItemChunk model."""
330
+
331
+ def for_user(self, user):
332
+ """Explicitly filter by specific user."""
333
+ return self.get_queryset().filter(user=user)
334
+
335
+ def all_users(self):
336
+ """Get unfiltered queryset (admin use)."""
337
+ return self.get_queryset()
338
+
339
+ def for_archive(self, archive_id: str):
340
+ """Get chunks for specific archive."""
341
+ return self.get_queryset().filter(archive_id=archive_id)
342
+
343
+ def for_item(self, item_id: str):
344
+ """Get chunks for specific item."""
345
+ return self.get_queryset().filter(item_id=item_id)
346
+
347
+ def by_chunk_type(self, chunk_type: str):
348
+ """Get chunks by type."""
349
+ return self.get_queryset().filter(chunk_type=chunk_type)
350
+
351
+ def vectorized(self):
352
+ """Get only vectorized chunks."""
353
+ return self.get_queryset().filter(embedding__isnull=False)
354
+
355
+ def pending_vectorization(self):
356
+ """Get chunks pending vectorization."""
357
+ return self.get_queryset().filter(embedding__isnull=True)
358
+
359
+ def by_content_type(self, content_type: str):
360
+ """Get chunks by parent item content type."""
361
+ return self.get_queryset().filter(item__content_type=content_type)
362
+
363
+ def by_language(self, language: str):
364
+ """Get chunks by parent item language."""
365
+ return self.get_queryset().filter(item__language=language)
366
+
367
+ def semantic_search(
368
+ self,
369
+ query_embedding: List[float],
370
+ limit: int = 5,
371
+ similarity_threshold: float = 0.7,
372
+ content_types: Optional[List[str]] = None,
373
+ languages: Optional[List[str]] = None,
374
+ chunk_types: Optional[List[str]] = None
375
+ ):
376
+ """Perform semantic search with advanced filtering."""
377
+ from pgvector.django import CosineDistance
378
+
379
+ queryset = self.get_queryset().filter(embedding__isnull=False)
380
+
381
+ # Apply filters
382
+ if content_types:
383
+ queryset = queryset.filter(item__content_type__in=content_types)
384
+
385
+ if languages:
386
+ queryset = queryset.filter(item__language__in=languages)
387
+
388
+ if chunk_types:
389
+ queryset = queryset.filter(chunk_type__in=chunk_types)
390
+
391
+ return queryset.annotate(
392
+ similarity=1 - CosineDistance('embedding', query_embedding)
393
+ ).filter(
394
+ similarity__gte=similarity_threshold
395
+ ).order_by('-similarity')[:limit]
396
+
397
+ def with_context(self):
398
+ """Get chunks with archive and item context."""
399
+ return self.get_queryset().select_related('archive', 'item')
400
+
401
+ def get_vectorization_statistics(self, archive_id: Optional[str] = None) -> Dict[str, Any]:
402
+ """Get vectorization statistics."""
403
+ queryset = self.get_queryset()
404
+ if archive_id:
405
+ queryset = queryset.filter(archive_id=archive_id)
406
+
407
+ return queryset.aggregate(
408
+ total_chunks=Count('id'),
409
+ vectorized_chunks=Count('id', filter=Q(embedding__isnull=False)),
410
+ total_tokens=Sum('token_count'),
411
+ total_cost=Sum('embedding_cost'),
412
+ avg_tokens_per_chunk=Avg('token_count'),
413
+ avg_cost_per_chunk=Avg('embedding_cost')
414
+ )
415
+
416
+ def get_chunk_type_distribution(self, archive_id: Optional[str] = None) -> Dict[str, int]:
417
+ """Get distribution of chunk types."""
418
+ queryset = self.get_queryset()
419
+ if archive_id:
420
+ queryset = queryset.filter(archive_id=archive_id)
421
+
422
+ return dict(
423
+ queryset.values('chunk_type').annotate(
424
+ count=Count('id')
425
+ ).values_list('chunk_type', 'count')
426
+ )