django-cfg 1.1.82__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (244) hide show
  1. django_cfg/__init__.py +20 -448
  2. django_cfg/apps/accounts/README.md +3 -3
  3. django_cfg/apps/accounts/admin/__init__.py +0 -2
  4. django_cfg/apps/accounts/admin/activity.py +2 -9
  5. django_cfg/apps/accounts/admin/filters.py +0 -42
  6. django_cfg/apps/accounts/admin/inlines.py +8 -8
  7. django_cfg/apps/accounts/admin/otp.py +5 -5
  8. django_cfg/apps/accounts/admin/registration_source.py +1 -8
  9. django_cfg/apps/accounts/admin/user.py +12 -20
  10. django_cfg/apps/accounts/managers/user_manager.py +2 -129
  11. django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
  12. django_cfg/apps/accounts/models.py +3 -123
  13. django_cfg/apps/accounts/serializers/otp.py +40 -44
  14. django_cfg/apps/accounts/serializers/profile.py +0 -2
  15. django_cfg/apps/accounts/services/otp_service.py +98 -186
  16. django_cfg/apps/accounts/signals.py +25 -15
  17. django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
  18. django_cfg/apps/accounts/views/otp.py +35 -36
  19. django_cfg/apps/agents/README.md +129 -0
  20. django_cfg/apps/agents/__init__.py +68 -0
  21. django_cfg/apps/agents/admin/__init__.py +17 -0
  22. django_cfg/apps/agents/admin/execution_admin.py +460 -0
  23. django_cfg/apps/agents/admin/registry_admin.py +360 -0
  24. django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
  25. django_cfg/apps/agents/apps.py +29 -0
  26. django_cfg/apps/agents/core/__init__.py +20 -0
  27. django_cfg/apps/agents/core/agent.py +281 -0
  28. django_cfg/apps/agents/core/dependencies.py +154 -0
  29. django_cfg/apps/agents/core/exceptions.py +66 -0
  30. django_cfg/apps/agents/core/models.py +106 -0
  31. django_cfg/apps/agents/core/orchestrator.py +391 -0
  32. django_cfg/apps/agents/examples/__init__.py +3 -0
  33. django_cfg/apps/agents/examples/simple_example.py +161 -0
  34. django_cfg/apps/agents/integration/__init__.py +14 -0
  35. django_cfg/apps/agents/integration/middleware.py +80 -0
  36. django_cfg/apps/agents/integration/registry.py +345 -0
  37. django_cfg/apps/agents/integration/signals.py +50 -0
  38. django_cfg/apps/agents/management/__init__.py +3 -0
  39. django_cfg/apps/agents/management/commands/__init__.py +3 -0
  40. django_cfg/apps/agents/management/commands/create_agent.py +365 -0
  41. django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
  42. django_cfg/apps/agents/managers/__init__.py +23 -0
  43. django_cfg/apps/agents/managers/execution.py +236 -0
  44. django_cfg/apps/agents/managers/registry.py +254 -0
  45. django_cfg/apps/agents/managers/toolsets.py +496 -0
  46. django_cfg/apps/agents/migrations/0001_initial.py +286 -0
  47. django_cfg/apps/agents/migrations/__init__.py +5 -0
  48. django_cfg/apps/agents/models/__init__.py +15 -0
  49. django_cfg/apps/agents/models/execution.py +215 -0
  50. django_cfg/apps/agents/models/registry.py +220 -0
  51. django_cfg/apps/agents/models/toolsets.py +305 -0
  52. django_cfg/apps/agents/patterns/__init__.py +24 -0
  53. django_cfg/apps/agents/patterns/content_agents.py +234 -0
  54. django_cfg/apps/agents/toolsets/__init__.py +15 -0
  55. django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
  56. django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
  57. django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
  58. django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
  59. django_cfg/apps/agents/urls.py +46 -0
  60. django_cfg/apps/knowbase/README.md +150 -0
  61. django_cfg/apps/knowbase/__init__.py +27 -0
  62. django_cfg/apps/knowbase/admin/__init__.py +23 -0
  63. django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
  64. django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
  65. django_cfg/apps/knowbase/admin/document_admin.py +650 -0
  66. django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
  67. django_cfg/apps/knowbase/apps.py +81 -0
  68. django_cfg/apps/knowbase/config/README.md +176 -0
  69. django_cfg/apps/knowbase/config/__init__.py +51 -0
  70. django_cfg/apps/knowbase/config/constance_fields.py +186 -0
  71. django_cfg/apps/knowbase/config/constance_settings.py +200 -0
  72. django_cfg/apps/knowbase/config/settings.py +450 -0
  73. django_cfg/apps/knowbase/examples/__init__.py +3 -0
  74. django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
  75. django_cfg/apps/knowbase/management/__init__.py +0 -0
  76. django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
  77. django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
  78. django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
  79. django_cfg/apps/knowbase/managers/__init__.py +22 -0
  80. django_cfg/apps/knowbase/managers/archive.py +426 -0
  81. django_cfg/apps/knowbase/managers/base.py +32 -0
  82. django_cfg/apps/knowbase/managers/chat.py +141 -0
  83. django_cfg/apps/knowbase/managers/document.py +203 -0
  84. django_cfg/apps/knowbase/managers/external_data.py +471 -0
  85. django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
  86. django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
  87. django_cfg/apps/knowbase/migrations/__init__.py +5 -0
  88. django_cfg/apps/knowbase/mixins/__init__.py +15 -0
  89. django_cfg/apps/knowbase/mixins/config.py +108 -0
  90. django_cfg/apps/knowbase/mixins/creator.py +81 -0
  91. django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
  92. django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
  93. django_cfg/apps/knowbase/mixins/service.py +362 -0
  94. django_cfg/apps/knowbase/models/__init__.py +41 -0
  95. django_cfg/apps/knowbase/models/archive.py +599 -0
  96. django_cfg/apps/knowbase/models/base.py +58 -0
  97. django_cfg/apps/knowbase/models/chat.py +157 -0
  98. django_cfg/apps/knowbase/models/document.py +267 -0
  99. django_cfg/apps/knowbase/models/external_data.py +376 -0
  100. django_cfg/apps/knowbase/serializers/__init__.py +68 -0
  101. django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
  102. django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
  103. django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
  104. django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
  105. django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
  106. django_cfg/apps/knowbase/services/__init__.py +40 -0
  107. django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
  108. django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
  109. django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
  110. django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
  111. django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
  112. django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
  113. django_cfg/apps/knowbase/services/base.py +53 -0
  114. django_cfg/apps/knowbase/services/chat_service.py +239 -0
  115. django_cfg/apps/knowbase/services/document_service.py +144 -0
  116. django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
  117. django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
  118. django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
  119. django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
  120. django_cfg/apps/knowbase/services/embedding/models.py +229 -0
  121. django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
  122. django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
  123. django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
  124. django_cfg/apps/knowbase/services/search_service.py +293 -0
  125. django_cfg/apps/knowbase/signals/__init__.py +21 -0
  126. django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
  127. django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
  128. django_cfg/apps/knowbase/signals/document_signals.py +143 -0
  129. django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
  130. django_cfg/apps/knowbase/tasks/__init__.py +39 -0
  131. django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
  132. django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
  133. django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
  134. django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
  135. django_cfg/apps/knowbase/urls.py +43 -0
  136. django_cfg/apps/knowbase/utils/__init__.py +12 -0
  137. django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
  138. django_cfg/apps/knowbase/utils/text_processing.py +375 -0
  139. django_cfg/apps/knowbase/utils/validation.py +99 -0
  140. django_cfg/apps/knowbase/views/__init__.py +28 -0
  141. django_cfg/apps/knowbase/views/archive_views.py +469 -0
  142. django_cfg/apps/knowbase/views/base.py +49 -0
  143. django_cfg/apps/knowbase/views/chat_views.py +181 -0
  144. django_cfg/apps/knowbase/views/document_views.py +183 -0
  145. django_cfg/apps/knowbase/views/public_views.py +129 -0
  146. django_cfg/apps/leads/admin.py +70 -0
  147. django_cfg/apps/newsletter/admin.py +234 -0
  148. django_cfg/apps/newsletter/admin_filters.py +124 -0
  149. django_cfg/apps/support/admin.py +196 -0
  150. django_cfg/apps/support/admin_filters.py +71 -0
  151. django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
  152. django_cfg/apps/urls.py +5 -4
  153. django_cfg/cli/README.md +1 -1
  154. django_cfg/cli/commands/create_project.py +2 -2
  155. django_cfg/cli/commands/info.py +1 -1
  156. django_cfg/config.py +44 -0
  157. django_cfg/core/config.py +29 -82
  158. django_cfg/core/environment.py +1 -1
  159. django_cfg/core/generation.py +19 -107
  160. django_cfg/{integration.py → core/integration.py} +18 -16
  161. django_cfg/core/validation.py +1 -1
  162. django_cfg/management/__init__.py +1 -1
  163. django_cfg/management/commands/__init__.py +1 -1
  164. django_cfg/management/commands/auto_generate.py +482 -0
  165. django_cfg/management/commands/migrator.py +19 -101
  166. django_cfg/management/commands/test_email.py +1 -1
  167. django_cfg/middleware/README.md +0 -158
  168. django_cfg/middleware/__init__.py +0 -2
  169. django_cfg/middleware/user_activity.py +3 -3
  170. django_cfg/models/api.py +145 -0
  171. django_cfg/models/base.py +287 -0
  172. django_cfg/models/cache.py +4 -4
  173. django_cfg/models/constance.py +25 -88
  174. django_cfg/models/database.py +9 -9
  175. django_cfg/models/drf.py +3 -36
  176. django_cfg/models/email.py +163 -0
  177. django_cfg/models/environment.py +276 -0
  178. django_cfg/models/limits.py +1 -1
  179. django_cfg/models/logging.py +366 -0
  180. django_cfg/models/revolution.py +41 -2
  181. django_cfg/models/security.py +125 -0
  182. django_cfg/models/services.py +1 -1
  183. django_cfg/modules/__init__.py +2 -56
  184. django_cfg/modules/base.py +78 -52
  185. django_cfg/modules/django_currency/service.py +2 -2
  186. django_cfg/modules/django_email.py +2 -2
  187. django_cfg/modules/django_health.py +267 -0
  188. django_cfg/modules/django_llm/llm/client.py +91 -19
  189. django_cfg/modules/django_llm/translator/translator.py +2 -2
  190. django_cfg/modules/django_logger.py +2 -2
  191. django_cfg/modules/django_ngrok.py +2 -2
  192. django_cfg/modules/django_tasks.py +68 -3
  193. django_cfg/modules/django_telegram.py +3 -3
  194. django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
  195. django_cfg/modules/django_twilio/service.py +2 -2
  196. django_cfg/modules/django_twilio/simple_service.py +2 -2
  197. django_cfg/modules/django_twilio/twilio_service.py +2 -2
  198. django_cfg/modules/django_unfold/__init__.py +69 -0
  199. django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
  200. django_cfg/modules/django_unfold/dashboard.py +278 -0
  201. django_cfg/modules/django_unfold/icons/README.md +145 -0
  202. django_cfg/modules/django_unfold/icons/__init__.py +12 -0
  203. django_cfg/modules/django_unfold/icons/constants.py +2851 -0
  204. django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
  205. django_cfg/modules/django_unfold/models/__init__.py +42 -0
  206. django_cfg/modules/django_unfold/models/config.py +601 -0
  207. django_cfg/modules/django_unfold/models/dashboard.py +206 -0
  208. django_cfg/modules/django_unfold/models/dropdown.py +40 -0
  209. django_cfg/modules/django_unfold/models/navigation.py +73 -0
  210. django_cfg/modules/django_unfold/models/tabs.py +25 -0
  211. django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
  212. django_cfg/modules/django_unfold/utils.py +140 -0
  213. django_cfg/registry/__init__.py +23 -0
  214. django_cfg/registry/core.py +61 -0
  215. django_cfg/registry/exceptions.py +11 -0
  216. django_cfg/registry/modules.py +12 -0
  217. django_cfg/registry/services.py +26 -0
  218. django_cfg/registry/third_party.py +52 -0
  219. django_cfg/routing/__init__.py +19 -0
  220. django_cfg/routing/callbacks.py +198 -0
  221. django_cfg/routing/routers.py +48 -0
  222. django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
  223. django_cfg/templatetags/__init__.py +0 -0
  224. django_cfg/templatetags/django_cfg.py +33 -0
  225. django_cfg/urls.py +33 -0
  226. django_cfg/utils/path_resolution.py +1 -1
  227. django_cfg/utils/smart_defaults.py +7 -61
  228. django_cfg/utils/toolkit.py +663 -0
  229. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/METADATA +83 -86
  230. django_cfg-1.2.1.dist-info/RECORD +441 -0
  231. django_cfg/archive/django_sample.zip +0 -0
  232. django_cfg/models/unfold.py +0 -271
  233. django_cfg/modules/unfold/__init__.py +0 -29
  234. django_cfg/modules/unfold/dashboard.py +0 -318
  235. django_cfg/pyproject.toml +0 -370
  236. django_cfg/routers.py +0 -83
  237. django_cfg-1.1.82.dist-info/RECORD +0 -278
  238. /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
  239. /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
  240. /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
  241. /django_cfg/{version_check.py → utils/version_check.py} +0 -0
  242. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/WHEEL +0 -0
  243. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/entry_points.txt +0 -0
  244. {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,599 @@
1
+ """
2
+ Archive models for universal document processing.
3
+
4
+ Supports any type of compressed document collections with context-aware chunking.
5
+ """
6
+
7
+ from django.db import models
8
+ from pgvector.django import VectorField
9
+ from typing import Optional, List, Dict, Any
10
+ import hashlib
11
+ import mimetypes
12
+ from pathlib import Path
13
+ from enum import Enum
14
+
15
+ from .base import UserScopedModel, ProcessingStatus, TimestampedModel
16
+ from .document import DocumentCategory
17
+
18
+
19
+ class ArchiveType(models.TextChoices):
20
+ """Supported archive formats."""
21
+ ZIP = "zip", "ZIP"
22
+ TAR = "tar", "TAR"
23
+ TAR_GZ = "tar.gz", "TAR.GZ"
24
+ TAR_BZ2 = "tar.bz2", "TAR.BZ2"
25
+
26
+
27
+ class ContentType(models.TextChoices):
28
+ """Content type classification for items."""
29
+ DOCUMENT = "document", "Document" # PDF, DOCX, TXT, MD
30
+ CODE = "code", "Code" # Programming files
31
+ IMAGE = "image", "Image" # Images (for OCR)
32
+ DATA = "data", "Data" # JSON, CSV, XML
33
+ ARCHIVE = "archive", "Archive" # Nested archives
34
+ UNKNOWN = "unknown", "Unknown" # Unprocessable
35
+
36
+
37
+ class ChunkType(models.TextChoices):
38
+ """Chunk type classification."""
39
+ TEXT = "text", "Text" # Regular text content
40
+ CODE = "code", "Code" # Code blocks
41
+ HEADING = "heading", "Heading" # Document headings
42
+ METADATA = "metadata", "Metadata" # File metadata
43
+ TABLE = "table", "Table" # Tabular data
44
+ LIST = "list", "List" # Lists and enumerations
45
+
46
+
47
+ class DocumentArchive(UserScopedModel):
48
+ """Universal archive entity for any document collection."""
49
+
50
+ # Custom managers
51
+ from ..managers.archive import DocumentArchiveManager
52
+ objects = DocumentArchiveManager()
53
+
54
+ title = models.CharField(
55
+ max_length=512,
56
+ help_text="Archive title"
57
+ )
58
+ description = models.TextField(
59
+ blank=True,
60
+ help_text="Archive description"
61
+ )
62
+
63
+ # Categories relationship (reuse existing DocumentCategory)
64
+ categories = models.ManyToManyField(
65
+ DocumentCategory,
66
+ blank=True,
67
+ related_name='archives',
68
+ help_text="Archive categories (supports multiple)"
69
+ )
70
+
71
+ is_public = models.BooleanField(
72
+ default=True,
73
+ help_text="Whether this archive is publicly accessible"
74
+ )
75
+
76
+ # Archive file storage
77
+ archive_file = models.FileField(
78
+ upload_to='archives/%Y/%m/%d/',
79
+ help_text="Uploaded archive file"
80
+ )
81
+
82
+ # Archive metadata
83
+ original_filename = models.CharField(
84
+ max_length=255,
85
+ help_text="Original uploaded filename"
86
+ )
87
+ file_size = models.PositiveIntegerField(
88
+ default=0,
89
+ help_text="Archive size in bytes"
90
+ )
91
+ archive_type = models.CharField(
92
+ max_length=20,
93
+ choices=ArchiveType.choices,
94
+ help_text="Archive format"
95
+ )
96
+ content_hash = models.CharField(
97
+ max_length=64,
98
+ db_index=True,
99
+ help_text="SHA-256 hash for duplicate detection"
100
+ )
101
+
102
+ # Processing status (synchronous processing)
103
+ processing_status = models.CharField(
104
+ max_length=20,
105
+ choices=ProcessingStatus.choices,
106
+ default=ProcessingStatus.PENDING,
107
+ db_index=True
108
+ )
109
+ processed_at = models.DateTimeField(
110
+ null=True,
111
+ blank=True,
112
+ help_text="When processing completed"
113
+ )
114
+ processing_error = models.TextField(
115
+ blank=True,
116
+ default="",
117
+ help_text="Error message if processing failed"
118
+ )
119
+ processing_duration_ms = models.PositiveIntegerField(
120
+ default=0,
121
+ help_text="Processing time in milliseconds"
122
+ )
123
+
124
+ # Statistics
125
+ total_items = models.PositiveIntegerField(
126
+ default=0,
127
+ help_text="Total items in archive"
128
+ )
129
+ processed_items = models.PositiveIntegerField(
130
+ default=0,
131
+ help_text="Successfully processed items"
132
+ )
133
+ total_chunks = models.PositiveIntegerField(
134
+ default=0,
135
+ help_text="Total chunks created"
136
+ )
137
+ vectorized_chunks = models.PositiveIntegerField(
138
+ default=0,
139
+ help_text="Chunks with embeddings"
140
+ )
141
+ total_tokens = models.PositiveIntegerField(
142
+ default=0,
143
+ help_text="Total tokens across all chunks"
144
+ )
145
+ total_cost_usd = models.FloatField(
146
+ default=0.0,
147
+ help_text="Total processing cost in USD"
148
+ )
149
+
150
+ # Metadata
151
+ metadata = models.JSONField(
152
+ default=dict,
153
+ blank=True,
154
+ null=True,
155
+ help_text="Additional archive metadata"
156
+ )
157
+
158
+ class Meta:
159
+ db_table = 'django_cfg_knowbase_document_archives'
160
+ indexes = [
161
+ models.Index(fields=['user', 'processing_status']),
162
+ models.Index(fields=['content_hash']),
163
+ models.Index(fields=['-processed_at']),
164
+ models.Index(fields=['is_public', '-created_at']),
165
+ ]
166
+ constraints = [
167
+ models.UniqueConstraint(
168
+ fields=['user', 'content_hash'],
169
+ name='unique_user_archive'
170
+ )
171
+ ]
172
+ verbose_name = 'Document Archive'
173
+ verbose_name_plural = 'Document Archives'
174
+ ordering = ['-created_at']
175
+
176
+ def save(self, *args, **kwargs):
177
+ """Override save to generate content_hash if not provided."""
178
+ # content_hash will be set by the service when processing file
179
+ super().save(*args, **kwargs)
180
+
181
+ def __str__(self) -> str:
182
+ return f"{self.title} ({self.user.username})"
183
+
184
+ @property
185
+ def is_processed(self) -> bool:
186
+ """Check if archive processing is completed."""
187
+ return self.processing_status == ProcessingStatus.COMPLETED
188
+
189
+ @property
190
+ def processing_progress(self) -> float:
191
+ """Calculate processing progress as percentage."""
192
+ if self.total_items == 0:
193
+ return 0.0
194
+ return (self.processed_items / self.total_items) * 100
195
+
196
+ @property
197
+ def vectorization_progress(self) -> float:
198
+ """Calculate vectorization progress as percentage."""
199
+ if self.total_chunks == 0:
200
+ return 0.0
201
+ return (self.vectorized_chunks / self.total_chunks) * 100
202
+
203
+ def get_file_tree(self) -> Dict[str, Any]:
204
+ """Build hierarchical file tree structure."""
205
+ items = self.items.all().order_by('relative_path')
206
+ tree: Dict[str, Any] = {}
207
+
208
+ for item in items:
209
+ parts = item.relative_path.split('/')
210
+ current = tree
211
+
212
+ for part in parts[:-1]: # All except filename
213
+ if part not in current:
214
+ current[part] = {'type': 'directory', 'children': {}}
215
+ current = current[part]['children']
216
+
217
+ # Add file
218
+ filename = parts[-1]
219
+ current[filename] = {
220
+ 'type': 'file',
221
+ 'id': str(item.id),
222
+ 'size': item.file_size,
223
+ 'content_type': item.content_type,
224
+ 'language': item.language,
225
+ 'is_processable': item.is_processable,
226
+ 'chunks_count': item.chunks_count
227
+ }
228
+
229
+ return tree
230
+
231
+
232
+ class ArchiveItem(UserScopedModel):
233
+ """Individual file/document within archive."""
234
+
235
+ # Custom managers
236
+ from ..managers.archive import ArchiveItemManager
237
+ objects = ArchiveItemManager()
238
+
239
+ archive = models.ForeignKey(
240
+ DocumentArchive,
241
+ on_delete=models.CASCADE,
242
+ related_name='items',
243
+ help_text="Parent archive"
244
+ )
245
+
246
+ # File metadata
247
+ relative_path = models.CharField(
248
+ max_length=1024,
249
+ help_text="Path within archive"
250
+ )
251
+ item_name = models.CharField(
252
+ max_length=255,
253
+ help_text="Item name"
254
+ )
255
+ item_type = models.CharField(
256
+ max_length=100,
257
+ help_text="MIME type"
258
+ )
259
+ content_type = models.CharField(
260
+ max_length=20,
261
+ choices=ContentType.choices,
262
+ default=ContentType.UNKNOWN,
263
+ help_text="Content classification"
264
+ )
265
+ file_size = models.PositiveIntegerField(
266
+ default=0,
267
+ help_text="Item size in bytes"
268
+ )
269
+ content_hash = models.CharField(
270
+ max_length=64,
271
+ help_text="SHA-256 hash of item content"
272
+ )
273
+
274
+ # Content processing
275
+ raw_content = models.TextField(
276
+ blank=True,
277
+ help_text="Extracted text content"
278
+ )
279
+ is_processable = models.BooleanField(
280
+ default=False,
281
+ help_text="Whether item can be processed for chunks"
282
+ )
283
+
284
+ # Metadata for context
285
+ language = models.CharField(
286
+ max_length=50,
287
+ blank=True,
288
+ help_text="Programming language or document language"
289
+ )
290
+ encoding = models.CharField(
291
+ max_length=50,
292
+ default='utf-8',
293
+ help_text="Character encoding"
294
+ )
295
+
296
+ # Processing results
297
+ chunks_count = models.PositiveIntegerField(
298
+ default=0,
299
+ help_text="Number of chunks created"
300
+ )
301
+ total_tokens = models.PositiveIntegerField(
302
+ default=0,
303
+ help_text="Total tokens in all chunks"
304
+ )
305
+ processing_cost = models.FloatField(
306
+ default=0.0,
307
+ help_text="Processing cost for this item"
308
+ )
309
+
310
+ # Additional metadata
311
+ metadata = models.JSONField(
312
+ default=dict,
313
+ blank=True,
314
+ null=True,
315
+ help_text="Item-specific metadata"
316
+ )
317
+
318
+ class Meta:
319
+ db_table = 'django_cfg_knowbase_archive_items'
320
+ indexes = [
321
+ models.Index(fields=['user']),
322
+ models.Index(fields=['archive', 'relative_path']),
323
+ models.Index(fields=['content_type', 'is_processable']),
324
+ models.Index(fields=['language']),
325
+ ]
326
+ constraints = [
327
+ models.UniqueConstraint(
328
+ fields=['archive', 'relative_path'],
329
+ name='unique_archive_item_path'
330
+ )
331
+ ]
332
+ ordering = ['archive', 'relative_path']
333
+ verbose_name = 'Archive Item'
334
+ verbose_name_plural = 'Archive Items'
335
+
336
+ def save(self, *args, **kwargs):
337
+ """Override save to set computed fields."""
338
+ if self.raw_content and not self.content_hash:
339
+ self.content_hash = hashlib.sha256(self.raw_content.encode()).hexdigest()
340
+
341
+ # Detect item type and programming language
342
+ if not self.item_type:
343
+ self.item_type, _ = mimetypes.guess_type(self.item_name)
344
+ if not self.item_type:
345
+ self.item_type = 'application/octet-stream'
346
+
347
+ if not self.language:
348
+ self.language = self.detect_programming_language()
349
+
350
+ if not self.content_type or self.content_type == ContentType.UNKNOWN:
351
+ self.content_type = self.detect_content_type()
352
+
353
+ super().save(*args, **kwargs)
354
+
355
+ def __str__(self) -> str:
356
+ return f"{self.relative_path} in {self.archive.title}"
357
+
358
+ def detect_programming_language(self) -> str:
359
+ """Detect programming language from file extension."""
360
+ LANGUAGE_MAP = {
361
+ '.py': 'python',
362
+ '.js': 'javascript',
363
+ '.ts': 'typescript',
364
+ '.jsx': 'react',
365
+ '.tsx': 'react-typescript',
366
+ '.java': 'java',
367
+ '.go': 'golang',
368
+ '.rs': 'rust',
369
+ '.cpp': 'cpp',
370
+ '.c': 'c',
371
+ '.php': 'php',
372
+ '.rb': 'ruby',
373
+ '.md': 'markdown',
374
+ '.yml': 'yaml',
375
+ '.yaml': 'yaml',
376
+ '.json': 'json',
377
+ '.xml': 'xml',
378
+ '.html': 'html',
379
+ '.css': 'css',
380
+ '.scss': 'scss',
381
+ '.sql': 'sql',
382
+ '.sh': 'bash',
383
+ '.dockerfile': 'dockerfile',
384
+ '.tf': 'terraform',
385
+ }
386
+
387
+ file_path = Path(self.item_name)
388
+ extension = file_path.suffix.lower()
389
+
390
+ # Special cases
391
+ if file_path.name.lower() in ['dockerfile', 'makefile']:
392
+ return file_path.name.lower()
393
+
394
+ return LANGUAGE_MAP.get(extension, '')
395
+
396
+ def detect_content_type(self) -> str:
397
+ """Detect content type from file extension and MIME type."""
398
+ file_path = Path(self.item_name)
399
+ extension = file_path.suffix.lower()
400
+
401
+ # Code files
402
+ code_extensions = {
403
+ '.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.go', '.rs',
404
+ '.cpp', '.c', '.h', '.hpp', '.php', '.rb', '.cs', '.swift',
405
+ '.kt', '.scala', '.clj', '.hs', '.ml', '.fs', '.elm'
406
+ }
407
+
408
+ # Document files
409
+ document_extensions = {
410
+ '.md', '.txt', '.rst', '.adoc', '.pdf', '.docx', '.doc'
411
+ }
412
+
413
+ # Data files
414
+ data_extensions = {
415
+ '.json', '.csv', '.xml', '.yml', '.yaml', '.toml', '.ini'
416
+ }
417
+
418
+ # Image files
419
+ image_extensions = {
420
+ '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.webp'
421
+ }
422
+
423
+ # Archive files
424
+ archive_extensions = {
425
+ '.zip', '.tar', '.gz', '.bz2', '.7z', '.rar'
426
+ }
427
+
428
+ if extension in code_extensions:
429
+ return ContentType.CODE
430
+ elif extension in document_extensions:
431
+ return ContentType.DOCUMENT
432
+ elif extension in data_extensions:
433
+ return ContentType.DATA
434
+ elif extension in image_extensions:
435
+ return ContentType.IMAGE
436
+ elif extension in archive_extensions:
437
+ return ContentType.ARCHIVE
438
+ else:
439
+ return ContentType.UNKNOWN
440
+
441
+ @property
442
+ def file_extension(self) -> str:
443
+ """Get file extension."""
444
+ return Path(self.item_name).suffix.lower()
445
+
446
+ @property
447
+ def is_code_file(self) -> bool:
448
+ """Check if item is a code file."""
449
+ return self.content_type == ContentType.CODE
450
+
451
+ @property
452
+ def is_document_file(self) -> bool:
453
+ """Check if item is a document file."""
454
+ return self.content_type == ContentType.DOCUMENT
455
+
456
+
457
+ class ArchiveItemChunk(UserScopedModel):
458
+ """Context-aware chunk with rich parent references."""
459
+
460
+ # Custom managers
461
+ from ..managers.archive import ArchiveItemChunkManager
462
+ objects = ArchiveItemChunkManager()
463
+
464
+ # Parent references
465
+ archive = models.ForeignKey(
466
+ DocumentArchive,
467
+ on_delete=models.CASCADE,
468
+ related_name='chunks',
469
+ help_text="Parent archive"
470
+ )
471
+ item = models.ForeignKey(
472
+ ArchiveItem,
473
+ on_delete=models.CASCADE,
474
+ related_name='chunks',
475
+ help_text="Parent item"
476
+ )
477
+
478
+ # Chunk content
479
+ content = models.TextField(
480
+ help_text="Chunk text content"
481
+ )
482
+ chunk_index = models.PositiveIntegerField(
483
+ help_text="Sequential chunk number within item"
484
+ )
485
+ chunk_type = models.CharField(
486
+ max_length=20,
487
+ choices=ChunkType.choices,
488
+ default=ChunkType.TEXT,
489
+ help_text="Type of content in chunk"
490
+ )
491
+
492
+ # Context preservation - rich metadata for AI understanding
493
+ context_metadata = models.JSONField(
494
+ default=dict,
495
+ help_text="Rich context information for AI processing"
496
+ )
497
+
498
+ # Vector embedding (1536 dimensions for OpenAI text-embedding-ada-002)
499
+ embedding = VectorField(
500
+ dimensions=1536,
501
+ null=True,
502
+ help_text="Vector embedding for semantic search"
503
+ )
504
+
505
+ # Chunk statistics
506
+ token_count = models.PositiveIntegerField(
507
+ default=0,
508
+ help_text="Number of tokens in chunk"
509
+ )
510
+ character_count = models.PositiveIntegerField(
511
+ default=0,
512
+ help_text="Number of characters in chunk"
513
+ )
514
+
515
+ # Processing metadata
516
+ embedding_model = models.CharField(
517
+ max_length=100,
518
+ default="text-embedding-ada-002",
519
+ help_text="Model used for embedding generation"
520
+ )
521
+ embedding_cost = models.FloatField(
522
+ default=0.0,
523
+ help_text="Cost in USD for embedding generation"
524
+ )
525
+
526
+ class Meta:
527
+ db_table = 'django_cfg_knowbase_archive_item_chunks'
528
+ indexes = [
529
+ models.Index(fields=['user']),
530
+ models.Index(fields=['archive']),
531
+ models.Index(fields=['item', 'chunk_index']),
532
+ models.Index(fields=['chunk_type']),
533
+ ]
534
+ constraints = [
535
+ models.UniqueConstraint(
536
+ fields=['item', 'chunk_index'],
537
+ name='unique_item_chunk'
538
+ )
539
+ ]
540
+ ordering = ['item', 'chunk_index']
541
+ verbose_name = 'Archive Item Chunk'
542
+ verbose_name_plural = 'Archive Item Chunks'
543
+
544
+ def save(self, *args, **kwargs):
545
+ """Override save to set computed fields."""
546
+ if self.content and not self.character_count:
547
+ self.character_count = len(self.content)
548
+
549
+ super().save(*args, **kwargs)
550
+
551
+ def __str__(self) -> str:
552
+ return f"Chunk {self.chunk_index} of {self.item.relative_path}"
553
+
554
+ @classmethod
555
+ def semantic_search(
556
+ cls,
557
+ user,
558
+ query_embedding: List[float],
559
+ limit: int = 5,
560
+ similarity_threshold: float = 0.7,
561
+ content_types: Optional[List[str]] = None,
562
+ languages: Optional[List[str]] = None
563
+ ):
564
+ """Perform semantic search using pgvector with context filtering."""
565
+ from pgvector.django import CosineDistance
566
+
567
+ queryset = cls.objects.filter(
568
+ user=user,
569
+ embedding__isnull=False
570
+ )
571
+
572
+ # Apply content type filter
573
+ if content_types:
574
+ queryset = queryset.filter(
575
+ item__content_type__in=content_types
576
+ )
577
+
578
+ # Apply language filter
579
+ if languages:
580
+ queryset = queryset.filter(
581
+ item__language__in=languages
582
+ )
583
+
584
+ return queryset.annotate(
585
+ similarity=1 - CosineDistance('embedding', query_embedding)
586
+ ).filter(
587
+ similarity__gte=similarity_threshold
588
+ ).order_by('-similarity')[:limit]
589
+
590
+ def get_context_summary(self) -> Dict[str, Any]:
591
+ """Get summary of chunk context for display."""
592
+ return {
593
+ 'archive_title': self.archive.title,
594
+ 'item_path': self.item.relative_path,
595
+ 'item_type': self.item.content_type,
596
+ 'language': self.item.language,
597
+ 'chunk_position': f"{self.chunk_index + 1}/{self.item.chunks_count}",
598
+ 'chunk_type': self.chunk_type,
599
+ }
@@ -0,0 +1,58 @@
1
+ """
2
+ Base models for knowledge base application.
3
+ """
4
+
5
+ from django.db import models
6
+ from django.contrib.auth import get_user_model
7
+ from django.conf import settings
8
+ from enum import Enum
9
+ import uuid
10
+
11
+
12
+ class ProcessingStatus(models.TextChoices):
13
+ """Document processing status enumeration."""
14
+ PENDING = "pending", "Pending"
15
+ PROCESSING = "processing", "Processing"
16
+ COMPLETED = "completed", "Completed"
17
+ FAILED = "failed", "Failed"
18
+ CANCELLED = "cancelled", "Cancelled"
19
+
20
+
21
+ class TimestampedModel(models.Model):
22
+ """Base model with automatic timestamps."""
23
+
24
+ id = models.UUIDField(
25
+ primary_key=True,
26
+ default=uuid.uuid4,
27
+ editable=False
28
+ )
29
+ created_at = models.DateTimeField(auto_now_add=True, db_index=True)
30
+ updated_at = models.DateTimeField(auto_now=True)
31
+
32
+ class Meta:
33
+ abstract = True
34
+ indexes = [
35
+ models.Index(fields=['-created_at']),
36
+ ]
37
+
38
+ @property
39
+ def short_uuid(self) -> str:
40
+ """Return first 6 characters of UUID for display."""
41
+ return str(self.id)[:6]
42
+
43
+
44
+ class UserScopedModel(TimestampedModel):
45
+ """Base model with user isolation."""
46
+
47
+ user = models.ForeignKey(
48
+ settings.AUTH_USER_MODEL,
49
+ on_delete=models.CASCADE,
50
+ db_index=True,
51
+ help_text="Owner of this record"
52
+ )
53
+
54
+ class Meta:
55
+ abstract = True
56
+ indexes = [
57
+ models.Index(fields=['user', '-created_at']),
58
+ ]