django-cfg 1.1.82__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_cfg/__init__.py +20 -448
- django_cfg/apps/accounts/README.md +3 -3
- django_cfg/apps/accounts/admin/__init__.py +0 -2
- django_cfg/apps/accounts/admin/activity.py +2 -9
- django_cfg/apps/accounts/admin/filters.py +0 -42
- django_cfg/apps/accounts/admin/inlines.py +8 -8
- django_cfg/apps/accounts/admin/otp.py +5 -5
- django_cfg/apps/accounts/admin/registration_source.py +1 -8
- django_cfg/apps/accounts/admin/user.py +12 -20
- django_cfg/apps/accounts/managers/user_manager.py +2 -129
- django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
- django_cfg/apps/accounts/models.py +3 -123
- django_cfg/apps/accounts/serializers/otp.py +40 -44
- django_cfg/apps/accounts/serializers/profile.py +0 -2
- django_cfg/apps/accounts/services/otp_service.py +98 -186
- django_cfg/apps/accounts/signals.py +25 -15
- django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
- django_cfg/apps/accounts/views/otp.py +35 -36
- django_cfg/apps/agents/README.md +129 -0
- django_cfg/apps/agents/__init__.py +68 -0
- django_cfg/apps/agents/admin/__init__.py +17 -0
- django_cfg/apps/agents/admin/execution_admin.py +460 -0
- django_cfg/apps/agents/admin/registry_admin.py +360 -0
- django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
- django_cfg/apps/agents/apps.py +29 -0
- django_cfg/apps/agents/core/__init__.py +20 -0
- django_cfg/apps/agents/core/agent.py +281 -0
- django_cfg/apps/agents/core/dependencies.py +154 -0
- django_cfg/apps/agents/core/exceptions.py +66 -0
- django_cfg/apps/agents/core/models.py +106 -0
- django_cfg/apps/agents/core/orchestrator.py +391 -0
- django_cfg/apps/agents/examples/__init__.py +3 -0
- django_cfg/apps/agents/examples/simple_example.py +161 -0
- django_cfg/apps/agents/integration/__init__.py +14 -0
- django_cfg/apps/agents/integration/middleware.py +80 -0
- django_cfg/apps/agents/integration/registry.py +345 -0
- django_cfg/apps/agents/integration/signals.py +50 -0
- django_cfg/apps/agents/management/__init__.py +3 -0
- django_cfg/apps/agents/management/commands/__init__.py +3 -0
- django_cfg/apps/agents/management/commands/create_agent.py +365 -0
- django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
- django_cfg/apps/agents/managers/__init__.py +23 -0
- django_cfg/apps/agents/managers/execution.py +236 -0
- django_cfg/apps/agents/managers/registry.py +254 -0
- django_cfg/apps/agents/managers/toolsets.py +496 -0
- django_cfg/apps/agents/migrations/0001_initial.py +286 -0
- django_cfg/apps/agents/migrations/__init__.py +5 -0
- django_cfg/apps/agents/models/__init__.py +15 -0
- django_cfg/apps/agents/models/execution.py +215 -0
- django_cfg/apps/agents/models/registry.py +220 -0
- django_cfg/apps/agents/models/toolsets.py +305 -0
- django_cfg/apps/agents/patterns/__init__.py +24 -0
- django_cfg/apps/agents/patterns/content_agents.py +234 -0
- django_cfg/apps/agents/toolsets/__init__.py +15 -0
- django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
- django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
- django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
- django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
- django_cfg/apps/agents/urls.py +46 -0
- django_cfg/apps/knowbase/README.md +150 -0
- django_cfg/apps/knowbase/__init__.py +27 -0
- django_cfg/apps/knowbase/admin/__init__.py +23 -0
- django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
- django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
- django_cfg/apps/knowbase/admin/document_admin.py +650 -0
- django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
- django_cfg/apps/knowbase/apps.py +81 -0
- django_cfg/apps/knowbase/config/README.md +176 -0
- django_cfg/apps/knowbase/config/__init__.py +51 -0
- django_cfg/apps/knowbase/config/constance_fields.py +186 -0
- django_cfg/apps/knowbase/config/constance_settings.py +200 -0
- django_cfg/apps/knowbase/config/settings.py +444 -0
- django_cfg/apps/knowbase/examples/__init__.py +3 -0
- django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
- django_cfg/apps/knowbase/management/__init__.py +0 -0
- django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
- django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
- django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
- django_cfg/apps/knowbase/managers/__init__.py +22 -0
- django_cfg/apps/knowbase/managers/archive.py +426 -0
- django_cfg/apps/knowbase/managers/base.py +32 -0
- django_cfg/apps/knowbase/managers/chat.py +141 -0
- django_cfg/apps/knowbase/managers/document.py +203 -0
- django_cfg/apps/knowbase/managers/external_data.py +471 -0
- django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
- django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
- django_cfg/apps/knowbase/migrations/__init__.py +5 -0
- django_cfg/apps/knowbase/mixins/__init__.py +15 -0
- django_cfg/apps/knowbase/mixins/config.py +108 -0
- django_cfg/apps/knowbase/mixins/creator.py +81 -0
- django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
- django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
- django_cfg/apps/knowbase/mixins/service.py +362 -0
- django_cfg/apps/knowbase/models/__init__.py +41 -0
- django_cfg/apps/knowbase/models/archive.py +599 -0
- django_cfg/apps/knowbase/models/base.py +58 -0
- django_cfg/apps/knowbase/models/chat.py +157 -0
- django_cfg/apps/knowbase/models/document.py +267 -0
- django_cfg/apps/knowbase/models/external_data.py +376 -0
- django_cfg/apps/knowbase/serializers/__init__.py +68 -0
- django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
- django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
- django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
- django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
- django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
- django_cfg/apps/knowbase/services/__init__.py +40 -0
- django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
- django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
- django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
- django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
- django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
- django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
- django_cfg/apps/knowbase/services/base.py +53 -0
- django_cfg/apps/knowbase/services/chat_service.py +239 -0
- django_cfg/apps/knowbase/services/document_service.py +144 -0
- django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
- django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
- django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
- django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
- django_cfg/apps/knowbase/services/embedding/models.py +229 -0
- django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
- django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
- django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
- django_cfg/apps/knowbase/services/search_service.py +293 -0
- django_cfg/apps/knowbase/signals/__init__.py +21 -0
- django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
- django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
- django_cfg/apps/knowbase/signals/document_signals.py +143 -0
- django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
- django_cfg/apps/knowbase/tasks/__init__.py +39 -0
- django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
- django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
- django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
- django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
- django_cfg/apps/knowbase/urls.py +43 -0
- django_cfg/apps/knowbase/utils/__init__.py +12 -0
- django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
- django_cfg/apps/knowbase/utils/text_processing.py +375 -0
- django_cfg/apps/knowbase/utils/validation.py +99 -0
- django_cfg/apps/knowbase/views/__init__.py +28 -0
- django_cfg/apps/knowbase/views/archive_views.py +469 -0
- django_cfg/apps/knowbase/views/base.py +49 -0
- django_cfg/apps/knowbase/views/chat_views.py +181 -0
- django_cfg/apps/knowbase/views/document_views.py +183 -0
- django_cfg/apps/knowbase/views/public_views.py +129 -0
- django_cfg/apps/leads/admin.py +70 -0
- django_cfg/apps/newsletter/admin.py +234 -0
- django_cfg/apps/newsletter/admin_filters.py +124 -0
- django_cfg/apps/support/admin.py +196 -0
- django_cfg/apps/support/admin_filters.py +71 -0
- django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
- django_cfg/apps/urls.py +5 -4
- django_cfg/cli/README.md +1 -1
- django_cfg/cli/commands/create_project.py +2 -2
- django_cfg/cli/commands/info.py +1 -1
- django_cfg/config.py +44 -0
- django_cfg/core/config.py +29 -82
- django_cfg/core/environment.py +1 -1
- django_cfg/core/generation.py +19 -107
- django_cfg/{integration.py → core/integration.py} +18 -16
- django_cfg/core/validation.py +1 -1
- django_cfg/management/__init__.py +1 -1
- django_cfg/management/commands/__init__.py +1 -1
- django_cfg/management/commands/auto_generate.py +482 -0
- django_cfg/management/commands/migrator.py +19 -101
- django_cfg/management/commands/test_email.py +1 -1
- django_cfg/middleware/README.md +0 -158
- django_cfg/middleware/__init__.py +0 -2
- django_cfg/middleware/user_activity.py +3 -3
- django_cfg/models/api.py +145 -0
- django_cfg/models/base.py +287 -0
- django_cfg/models/cache.py +4 -4
- django_cfg/models/constance.py +25 -88
- django_cfg/models/database.py +9 -9
- django_cfg/models/drf.py +3 -36
- django_cfg/models/email.py +163 -0
- django_cfg/models/environment.py +276 -0
- django_cfg/models/limits.py +1 -1
- django_cfg/models/logging.py +366 -0
- django_cfg/models/revolution.py +41 -2
- django_cfg/models/security.py +125 -0
- django_cfg/models/services.py +1 -1
- django_cfg/modules/__init__.py +2 -56
- django_cfg/modules/base.py +78 -52
- django_cfg/modules/django_currency/service.py +2 -2
- django_cfg/modules/django_email.py +2 -2
- django_cfg/modules/django_health.py +267 -0
- django_cfg/modules/django_llm/llm/client.py +79 -17
- django_cfg/modules/django_llm/translator/translator.py +2 -2
- django_cfg/modules/django_logger.py +2 -2
- django_cfg/modules/django_ngrok.py +2 -2
- django_cfg/modules/django_tasks.py +68 -3
- django_cfg/modules/django_telegram.py +3 -3
- django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
- django_cfg/modules/django_twilio/service.py +2 -2
- django_cfg/modules/django_twilio/simple_service.py +2 -2
- django_cfg/modules/django_twilio/twilio_service.py +2 -2
- django_cfg/modules/django_unfold/__init__.py +69 -0
- django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
- django_cfg/modules/django_unfold/dashboard.py +278 -0
- django_cfg/modules/django_unfold/icons/README.md +145 -0
- django_cfg/modules/django_unfold/icons/__init__.py +12 -0
- django_cfg/modules/django_unfold/icons/constants.py +2851 -0
- django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
- django_cfg/modules/django_unfold/models/__init__.py +42 -0
- django_cfg/modules/django_unfold/models/config.py +601 -0
- django_cfg/modules/django_unfold/models/dashboard.py +206 -0
- django_cfg/modules/django_unfold/models/dropdown.py +40 -0
- django_cfg/modules/django_unfold/models/navigation.py +73 -0
- django_cfg/modules/django_unfold/models/tabs.py +25 -0
- django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
- django_cfg/modules/django_unfold/utils.py +140 -0
- django_cfg/registry/__init__.py +23 -0
- django_cfg/registry/core.py +61 -0
- django_cfg/registry/exceptions.py +11 -0
- django_cfg/registry/modules.py +12 -0
- django_cfg/registry/services.py +26 -0
- django_cfg/registry/third_party.py +52 -0
- django_cfg/routing/__init__.py +19 -0
- django_cfg/routing/callbacks.py +198 -0
- django_cfg/routing/routers.py +48 -0
- django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
- django_cfg/templatetags/__init__.py +0 -0
- django_cfg/templatetags/django_cfg.py +33 -0
- django_cfg/urls.py +33 -0
- django_cfg/utils/path_resolution.py +1 -1
- django_cfg/utils/smart_defaults.py +7 -61
- django_cfg/utils/toolkit.py +663 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.0.dist-info}/METADATA +83 -86
- django_cfg-1.2.0.dist-info/RECORD +441 -0
- django_cfg/archive/django_sample.zip +0 -0
- django_cfg/models/unfold.py +0 -271
- django_cfg/modules/unfold/__init__.py +0 -29
- django_cfg/modules/unfold/dashboard.py +0 -318
- django_cfg/pyproject.toml +0 -370
- django_cfg/routers.py +0 -83
- django_cfg-1.1.82.dist-info/RECORD +0 -278
- /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
- /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
- /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
- /django_cfg/{version_check.py → utils/version_check.py} +0 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.0.dist-info}/WHEEL +0 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.0.dist-info}/entry_points.txt +0 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,599 @@
|
|
1
|
+
"""
|
2
|
+
Archive models for universal document processing.
|
3
|
+
|
4
|
+
Supports any type of compressed document collections with context-aware chunking.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from django.db import models
|
8
|
+
from pgvector.django import VectorField
|
9
|
+
from typing import Optional, List, Dict, Any
|
10
|
+
import hashlib
|
11
|
+
import mimetypes
|
12
|
+
from pathlib import Path
|
13
|
+
from enum import Enum
|
14
|
+
|
15
|
+
from .base import UserScopedModel, ProcessingStatus, TimestampedModel
|
16
|
+
from .document import DocumentCategory
|
17
|
+
|
18
|
+
|
19
|
+
class ArchiveType(models.TextChoices):
|
20
|
+
"""Supported archive formats."""
|
21
|
+
ZIP = "zip", "ZIP"
|
22
|
+
TAR = "tar", "TAR"
|
23
|
+
TAR_GZ = "tar.gz", "TAR.GZ"
|
24
|
+
TAR_BZ2 = "tar.bz2", "TAR.BZ2"
|
25
|
+
|
26
|
+
|
27
|
+
class ContentType(models.TextChoices):
|
28
|
+
"""Content type classification for items."""
|
29
|
+
DOCUMENT = "document", "Document" # PDF, DOCX, TXT, MD
|
30
|
+
CODE = "code", "Code" # Programming files
|
31
|
+
IMAGE = "image", "Image" # Images (for OCR)
|
32
|
+
DATA = "data", "Data" # JSON, CSV, XML
|
33
|
+
ARCHIVE = "archive", "Archive" # Nested archives
|
34
|
+
UNKNOWN = "unknown", "Unknown" # Unprocessable
|
35
|
+
|
36
|
+
|
37
|
+
class ChunkType(models.TextChoices):
|
38
|
+
"""Chunk type classification."""
|
39
|
+
TEXT = "text", "Text" # Regular text content
|
40
|
+
CODE = "code", "Code" # Code blocks
|
41
|
+
HEADING = "heading", "Heading" # Document headings
|
42
|
+
METADATA = "metadata", "Metadata" # File metadata
|
43
|
+
TABLE = "table", "Table" # Tabular data
|
44
|
+
LIST = "list", "List" # Lists and enumerations
|
45
|
+
|
46
|
+
|
47
|
+
class DocumentArchive(UserScopedModel):
|
48
|
+
"""Universal archive entity for any document collection."""
|
49
|
+
|
50
|
+
# Custom managers
|
51
|
+
from ..managers.archive import DocumentArchiveManager
|
52
|
+
objects = DocumentArchiveManager()
|
53
|
+
|
54
|
+
title = models.CharField(
|
55
|
+
max_length=512,
|
56
|
+
help_text="Archive title"
|
57
|
+
)
|
58
|
+
description = models.TextField(
|
59
|
+
blank=True,
|
60
|
+
help_text="Archive description"
|
61
|
+
)
|
62
|
+
|
63
|
+
# Categories relationship (reuse existing DocumentCategory)
|
64
|
+
categories = models.ManyToManyField(
|
65
|
+
DocumentCategory,
|
66
|
+
blank=True,
|
67
|
+
related_name='archives',
|
68
|
+
help_text="Archive categories (supports multiple)"
|
69
|
+
)
|
70
|
+
|
71
|
+
is_public = models.BooleanField(
|
72
|
+
default=True,
|
73
|
+
help_text="Whether this archive is publicly accessible"
|
74
|
+
)
|
75
|
+
|
76
|
+
# Archive file storage
|
77
|
+
archive_file = models.FileField(
|
78
|
+
upload_to='archives/%Y/%m/%d/',
|
79
|
+
help_text="Uploaded archive file"
|
80
|
+
)
|
81
|
+
|
82
|
+
# Archive metadata
|
83
|
+
original_filename = models.CharField(
|
84
|
+
max_length=255,
|
85
|
+
help_text="Original uploaded filename"
|
86
|
+
)
|
87
|
+
file_size = models.PositiveIntegerField(
|
88
|
+
default=0,
|
89
|
+
help_text="Archive size in bytes"
|
90
|
+
)
|
91
|
+
archive_type = models.CharField(
|
92
|
+
max_length=20,
|
93
|
+
choices=ArchiveType.choices,
|
94
|
+
help_text="Archive format"
|
95
|
+
)
|
96
|
+
content_hash = models.CharField(
|
97
|
+
max_length=64,
|
98
|
+
db_index=True,
|
99
|
+
help_text="SHA-256 hash for duplicate detection"
|
100
|
+
)
|
101
|
+
|
102
|
+
# Processing status (synchronous processing)
|
103
|
+
processing_status = models.CharField(
|
104
|
+
max_length=20,
|
105
|
+
choices=ProcessingStatus.choices,
|
106
|
+
default=ProcessingStatus.PENDING,
|
107
|
+
db_index=True
|
108
|
+
)
|
109
|
+
processed_at = models.DateTimeField(
|
110
|
+
null=True,
|
111
|
+
blank=True,
|
112
|
+
help_text="When processing completed"
|
113
|
+
)
|
114
|
+
processing_error = models.TextField(
|
115
|
+
blank=True,
|
116
|
+
default="",
|
117
|
+
help_text="Error message if processing failed"
|
118
|
+
)
|
119
|
+
processing_duration_ms = models.PositiveIntegerField(
|
120
|
+
default=0,
|
121
|
+
help_text="Processing time in milliseconds"
|
122
|
+
)
|
123
|
+
|
124
|
+
# Statistics
|
125
|
+
total_items = models.PositiveIntegerField(
|
126
|
+
default=0,
|
127
|
+
help_text="Total items in archive"
|
128
|
+
)
|
129
|
+
processed_items = models.PositiveIntegerField(
|
130
|
+
default=0,
|
131
|
+
help_text="Successfully processed items"
|
132
|
+
)
|
133
|
+
total_chunks = models.PositiveIntegerField(
|
134
|
+
default=0,
|
135
|
+
help_text="Total chunks created"
|
136
|
+
)
|
137
|
+
vectorized_chunks = models.PositiveIntegerField(
|
138
|
+
default=0,
|
139
|
+
help_text="Chunks with embeddings"
|
140
|
+
)
|
141
|
+
total_tokens = models.PositiveIntegerField(
|
142
|
+
default=0,
|
143
|
+
help_text="Total tokens across all chunks"
|
144
|
+
)
|
145
|
+
total_cost_usd = models.FloatField(
|
146
|
+
default=0.0,
|
147
|
+
help_text="Total processing cost in USD"
|
148
|
+
)
|
149
|
+
|
150
|
+
# Metadata
|
151
|
+
metadata = models.JSONField(
|
152
|
+
default=dict,
|
153
|
+
blank=True,
|
154
|
+
null=True,
|
155
|
+
help_text="Additional archive metadata"
|
156
|
+
)
|
157
|
+
|
158
|
+
class Meta:
|
159
|
+
db_table = 'django_cfg_knowbase_document_archives'
|
160
|
+
indexes = [
|
161
|
+
models.Index(fields=['user', 'processing_status']),
|
162
|
+
models.Index(fields=['content_hash']),
|
163
|
+
models.Index(fields=['-processed_at']),
|
164
|
+
models.Index(fields=['is_public', '-created_at']),
|
165
|
+
]
|
166
|
+
constraints = [
|
167
|
+
models.UniqueConstraint(
|
168
|
+
fields=['user', 'content_hash'],
|
169
|
+
name='unique_user_archive'
|
170
|
+
)
|
171
|
+
]
|
172
|
+
verbose_name = 'Document Archive'
|
173
|
+
verbose_name_plural = 'Document Archives'
|
174
|
+
ordering = ['-created_at']
|
175
|
+
|
176
|
+
def save(self, *args, **kwargs):
|
177
|
+
"""Override save to generate content_hash if not provided."""
|
178
|
+
# content_hash will be set by the service when processing file
|
179
|
+
super().save(*args, **kwargs)
|
180
|
+
|
181
|
+
def __str__(self) -> str:
|
182
|
+
return f"{self.title} ({self.user.username})"
|
183
|
+
|
184
|
+
@property
|
185
|
+
def is_processed(self) -> bool:
|
186
|
+
"""Check if archive processing is completed."""
|
187
|
+
return self.processing_status == ProcessingStatus.COMPLETED
|
188
|
+
|
189
|
+
@property
|
190
|
+
def processing_progress(self) -> float:
|
191
|
+
"""Calculate processing progress as percentage."""
|
192
|
+
if self.total_items == 0:
|
193
|
+
return 0.0
|
194
|
+
return (self.processed_items / self.total_items) * 100
|
195
|
+
|
196
|
+
@property
|
197
|
+
def vectorization_progress(self) -> float:
|
198
|
+
"""Calculate vectorization progress as percentage."""
|
199
|
+
if self.total_chunks == 0:
|
200
|
+
return 0.0
|
201
|
+
return (self.vectorized_chunks / self.total_chunks) * 100
|
202
|
+
|
203
|
+
def get_file_tree(self) -> Dict[str, Any]:
|
204
|
+
"""Build hierarchical file tree structure."""
|
205
|
+
items = self.items.all().order_by('relative_path')
|
206
|
+
tree: Dict[str, Any] = {}
|
207
|
+
|
208
|
+
for item in items:
|
209
|
+
parts = item.relative_path.split('/')
|
210
|
+
current = tree
|
211
|
+
|
212
|
+
for part in parts[:-1]: # All except filename
|
213
|
+
if part not in current:
|
214
|
+
current[part] = {'type': 'directory', 'children': {}}
|
215
|
+
current = current[part]['children']
|
216
|
+
|
217
|
+
# Add file
|
218
|
+
filename = parts[-1]
|
219
|
+
current[filename] = {
|
220
|
+
'type': 'file',
|
221
|
+
'id': str(item.id),
|
222
|
+
'size': item.file_size,
|
223
|
+
'content_type': item.content_type,
|
224
|
+
'language': item.language,
|
225
|
+
'is_processable': item.is_processable,
|
226
|
+
'chunks_count': item.chunks_count
|
227
|
+
}
|
228
|
+
|
229
|
+
return tree
|
230
|
+
|
231
|
+
|
232
|
+
class ArchiveItem(UserScopedModel):
|
233
|
+
"""Individual file/document within archive."""
|
234
|
+
|
235
|
+
# Custom managers
|
236
|
+
from ..managers.archive import ArchiveItemManager
|
237
|
+
objects = ArchiveItemManager()
|
238
|
+
|
239
|
+
archive = models.ForeignKey(
|
240
|
+
DocumentArchive,
|
241
|
+
on_delete=models.CASCADE,
|
242
|
+
related_name='items',
|
243
|
+
help_text="Parent archive"
|
244
|
+
)
|
245
|
+
|
246
|
+
# File metadata
|
247
|
+
relative_path = models.CharField(
|
248
|
+
max_length=1024,
|
249
|
+
help_text="Path within archive"
|
250
|
+
)
|
251
|
+
item_name = models.CharField(
|
252
|
+
max_length=255,
|
253
|
+
help_text="Item name"
|
254
|
+
)
|
255
|
+
item_type = models.CharField(
|
256
|
+
max_length=100,
|
257
|
+
help_text="MIME type"
|
258
|
+
)
|
259
|
+
content_type = models.CharField(
|
260
|
+
max_length=20,
|
261
|
+
choices=ContentType.choices,
|
262
|
+
default=ContentType.UNKNOWN,
|
263
|
+
help_text="Content classification"
|
264
|
+
)
|
265
|
+
file_size = models.PositiveIntegerField(
|
266
|
+
default=0,
|
267
|
+
help_text="Item size in bytes"
|
268
|
+
)
|
269
|
+
content_hash = models.CharField(
|
270
|
+
max_length=64,
|
271
|
+
help_text="SHA-256 hash of item content"
|
272
|
+
)
|
273
|
+
|
274
|
+
# Content processing
|
275
|
+
raw_content = models.TextField(
|
276
|
+
blank=True,
|
277
|
+
help_text="Extracted text content"
|
278
|
+
)
|
279
|
+
is_processable = models.BooleanField(
|
280
|
+
default=False,
|
281
|
+
help_text="Whether item can be processed for chunks"
|
282
|
+
)
|
283
|
+
|
284
|
+
# Metadata for context
|
285
|
+
language = models.CharField(
|
286
|
+
max_length=50,
|
287
|
+
blank=True,
|
288
|
+
help_text="Programming language or document language"
|
289
|
+
)
|
290
|
+
encoding = models.CharField(
|
291
|
+
max_length=50,
|
292
|
+
default='utf-8',
|
293
|
+
help_text="Character encoding"
|
294
|
+
)
|
295
|
+
|
296
|
+
# Processing results
|
297
|
+
chunks_count = models.PositiveIntegerField(
|
298
|
+
default=0,
|
299
|
+
help_text="Number of chunks created"
|
300
|
+
)
|
301
|
+
total_tokens = models.PositiveIntegerField(
|
302
|
+
default=0,
|
303
|
+
help_text="Total tokens in all chunks"
|
304
|
+
)
|
305
|
+
processing_cost = models.FloatField(
|
306
|
+
default=0.0,
|
307
|
+
help_text="Processing cost for this item"
|
308
|
+
)
|
309
|
+
|
310
|
+
# Additional metadata
|
311
|
+
metadata = models.JSONField(
|
312
|
+
default=dict,
|
313
|
+
blank=True,
|
314
|
+
null=True,
|
315
|
+
help_text="Item-specific metadata"
|
316
|
+
)
|
317
|
+
|
318
|
+
class Meta:
|
319
|
+
db_table = 'django_cfg_knowbase_archive_items'
|
320
|
+
indexes = [
|
321
|
+
models.Index(fields=['user']),
|
322
|
+
models.Index(fields=['archive', 'relative_path']),
|
323
|
+
models.Index(fields=['content_type', 'is_processable']),
|
324
|
+
models.Index(fields=['language']),
|
325
|
+
]
|
326
|
+
constraints = [
|
327
|
+
models.UniqueConstraint(
|
328
|
+
fields=['archive', 'relative_path'],
|
329
|
+
name='unique_archive_item_path'
|
330
|
+
)
|
331
|
+
]
|
332
|
+
ordering = ['archive', 'relative_path']
|
333
|
+
verbose_name = 'Archive Item'
|
334
|
+
verbose_name_plural = 'Archive Items'
|
335
|
+
|
336
|
+
def save(self, *args, **kwargs):
|
337
|
+
"""Override save to set computed fields."""
|
338
|
+
if self.raw_content and not self.content_hash:
|
339
|
+
self.content_hash = hashlib.sha256(self.raw_content.encode()).hexdigest()
|
340
|
+
|
341
|
+
# Detect item type and programming language
|
342
|
+
if not self.item_type:
|
343
|
+
self.item_type, _ = mimetypes.guess_type(self.item_name)
|
344
|
+
if not self.item_type:
|
345
|
+
self.item_type = 'application/octet-stream'
|
346
|
+
|
347
|
+
if not self.language:
|
348
|
+
self.language = self.detect_programming_language()
|
349
|
+
|
350
|
+
if not self.content_type or self.content_type == ContentType.UNKNOWN:
|
351
|
+
self.content_type = self.detect_content_type()
|
352
|
+
|
353
|
+
super().save(*args, **kwargs)
|
354
|
+
|
355
|
+
def __str__(self) -> str:
|
356
|
+
return f"{self.relative_path} in {self.archive.title}"
|
357
|
+
|
358
|
+
def detect_programming_language(self) -> str:
|
359
|
+
"""Detect programming language from file extension."""
|
360
|
+
LANGUAGE_MAP = {
|
361
|
+
'.py': 'python',
|
362
|
+
'.js': 'javascript',
|
363
|
+
'.ts': 'typescript',
|
364
|
+
'.jsx': 'react',
|
365
|
+
'.tsx': 'react-typescript',
|
366
|
+
'.java': 'java',
|
367
|
+
'.go': 'golang',
|
368
|
+
'.rs': 'rust',
|
369
|
+
'.cpp': 'cpp',
|
370
|
+
'.c': 'c',
|
371
|
+
'.php': 'php',
|
372
|
+
'.rb': 'ruby',
|
373
|
+
'.md': 'markdown',
|
374
|
+
'.yml': 'yaml',
|
375
|
+
'.yaml': 'yaml',
|
376
|
+
'.json': 'json',
|
377
|
+
'.xml': 'xml',
|
378
|
+
'.html': 'html',
|
379
|
+
'.css': 'css',
|
380
|
+
'.scss': 'scss',
|
381
|
+
'.sql': 'sql',
|
382
|
+
'.sh': 'bash',
|
383
|
+
'.dockerfile': 'dockerfile',
|
384
|
+
'.tf': 'terraform',
|
385
|
+
}
|
386
|
+
|
387
|
+
file_path = Path(self.item_name)
|
388
|
+
extension = file_path.suffix.lower()
|
389
|
+
|
390
|
+
# Special cases
|
391
|
+
if file_path.name.lower() in ['dockerfile', 'makefile']:
|
392
|
+
return file_path.name.lower()
|
393
|
+
|
394
|
+
return LANGUAGE_MAP.get(extension, '')
|
395
|
+
|
396
|
+
def detect_content_type(self) -> str:
|
397
|
+
"""Detect content type from file extension and MIME type."""
|
398
|
+
file_path = Path(self.item_name)
|
399
|
+
extension = file_path.suffix.lower()
|
400
|
+
|
401
|
+
# Code files
|
402
|
+
code_extensions = {
|
403
|
+
'.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.go', '.rs',
|
404
|
+
'.cpp', '.c', '.h', '.hpp', '.php', '.rb', '.cs', '.swift',
|
405
|
+
'.kt', '.scala', '.clj', '.hs', '.ml', '.fs', '.elm'
|
406
|
+
}
|
407
|
+
|
408
|
+
# Document files
|
409
|
+
document_extensions = {
|
410
|
+
'.md', '.txt', '.rst', '.adoc', '.pdf', '.docx', '.doc'
|
411
|
+
}
|
412
|
+
|
413
|
+
# Data files
|
414
|
+
data_extensions = {
|
415
|
+
'.json', '.csv', '.xml', '.yml', '.yaml', '.toml', '.ini'
|
416
|
+
}
|
417
|
+
|
418
|
+
# Image files
|
419
|
+
image_extensions = {
|
420
|
+
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.webp'
|
421
|
+
}
|
422
|
+
|
423
|
+
# Archive files
|
424
|
+
archive_extensions = {
|
425
|
+
'.zip', '.tar', '.gz', '.bz2', '.7z', '.rar'
|
426
|
+
}
|
427
|
+
|
428
|
+
if extension in code_extensions:
|
429
|
+
return ContentType.CODE
|
430
|
+
elif extension in document_extensions:
|
431
|
+
return ContentType.DOCUMENT
|
432
|
+
elif extension in data_extensions:
|
433
|
+
return ContentType.DATA
|
434
|
+
elif extension in image_extensions:
|
435
|
+
return ContentType.IMAGE
|
436
|
+
elif extension in archive_extensions:
|
437
|
+
return ContentType.ARCHIVE
|
438
|
+
else:
|
439
|
+
return ContentType.UNKNOWN
|
440
|
+
|
441
|
+
@property
|
442
|
+
def file_extension(self) -> str:
|
443
|
+
"""Get file extension."""
|
444
|
+
return Path(self.item_name).suffix.lower()
|
445
|
+
|
446
|
+
@property
|
447
|
+
def is_code_file(self) -> bool:
|
448
|
+
"""Check if item is a code file."""
|
449
|
+
return self.content_type == ContentType.CODE
|
450
|
+
|
451
|
+
@property
|
452
|
+
def is_document_file(self) -> bool:
|
453
|
+
"""Check if item is a document file."""
|
454
|
+
return self.content_type == ContentType.DOCUMENT
|
455
|
+
|
456
|
+
|
457
|
+
class ArchiveItemChunk(UserScopedModel):
|
458
|
+
"""Context-aware chunk with rich parent references."""
|
459
|
+
|
460
|
+
# Custom managers
|
461
|
+
from ..managers.archive import ArchiveItemChunkManager
|
462
|
+
objects = ArchiveItemChunkManager()
|
463
|
+
|
464
|
+
# Parent references
|
465
|
+
archive = models.ForeignKey(
|
466
|
+
DocumentArchive,
|
467
|
+
on_delete=models.CASCADE,
|
468
|
+
related_name='chunks',
|
469
|
+
help_text="Parent archive"
|
470
|
+
)
|
471
|
+
item = models.ForeignKey(
|
472
|
+
ArchiveItem,
|
473
|
+
on_delete=models.CASCADE,
|
474
|
+
related_name='chunks',
|
475
|
+
help_text="Parent item"
|
476
|
+
)
|
477
|
+
|
478
|
+
# Chunk content
|
479
|
+
content = models.TextField(
|
480
|
+
help_text="Chunk text content"
|
481
|
+
)
|
482
|
+
chunk_index = models.PositiveIntegerField(
|
483
|
+
help_text="Sequential chunk number within item"
|
484
|
+
)
|
485
|
+
chunk_type = models.CharField(
|
486
|
+
max_length=20,
|
487
|
+
choices=ChunkType.choices,
|
488
|
+
default=ChunkType.TEXT,
|
489
|
+
help_text="Type of content in chunk"
|
490
|
+
)
|
491
|
+
|
492
|
+
# Context preservation - rich metadata for AI understanding
|
493
|
+
context_metadata = models.JSONField(
|
494
|
+
default=dict,
|
495
|
+
help_text="Rich context information for AI processing"
|
496
|
+
)
|
497
|
+
|
498
|
+
# Vector embedding (1536 dimensions for OpenAI text-embedding-ada-002)
|
499
|
+
embedding = VectorField(
|
500
|
+
dimensions=1536,
|
501
|
+
null=True,
|
502
|
+
help_text="Vector embedding for semantic search"
|
503
|
+
)
|
504
|
+
|
505
|
+
# Chunk statistics
|
506
|
+
token_count = models.PositiveIntegerField(
|
507
|
+
default=0,
|
508
|
+
help_text="Number of tokens in chunk"
|
509
|
+
)
|
510
|
+
character_count = models.PositiveIntegerField(
|
511
|
+
default=0,
|
512
|
+
help_text="Number of characters in chunk"
|
513
|
+
)
|
514
|
+
|
515
|
+
# Processing metadata
|
516
|
+
embedding_model = models.CharField(
|
517
|
+
max_length=100,
|
518
|
+
default="text-embedding-ada-002",
|
519
|
+
help_text="Model used for embedding generation"
|
520
|
+
)
|
521
|
+
embedding_cost = models.FloatField(
|
522
|
+
default=0.0,
|
523
|
+
help_text="Cost in USD for embedding generation"
|
524
|
+
)
|
525
|
+
|
526
|
+
class Meta:
|
527
|
+
db_table = 'django_cfg_knowbase_archive_item_chunks'
|
528
|
+
indexes = [
|
529
|
+
models.Index(fields=['user']),
|
530
|
+
models.Index(fields=['archive']),
|
531
|
+
models.Index(fields=['item', 'chunk_index']),
|
532
|
+
models.Index(fields=['chunk_type']),
|
533
|
+
]
|
534
|
+
constraints = [
|
535
|
+
models.UniqueConstraint(
|
536
|
+
fields=['item', 'chunk_index'],
|
537
|
+
name='unique_item_chunk'
|
538
|
+
)
|
539
|
+
]
|
540
|
+
ordering = ['item', 'chunk_index']
|
541
|
+
verbose_name = 'Archive Item Chunk'
|
542
|
+
verbose_name_plural = 'Archive Item Chunks'
|
543
|
+
|
544
|
+
def save(self, *args, **kwargs):
|
545
|
+
"""Override save to set computed fields."""
|
546
|
+
if self.content and not self.character_count:
|
547
|
+
self.character_count = len(self.content)
|
548
|
+
|
549
|
+
super().save(*args, **kwargs)
|
550
|
+
|
551
|
+
def __str__(self) -> str:
|
552
|
+
return f"Chunk {self.chunk_index} of {self.item.relative_path}"
|
553
|
+
|
554
|
+
@classmethod
|
555
|
+
def semantic_search(
|
556
|
+
cls,
|
557
|
+
user,
|
558
|
+
query_embedding: List[float],
|
559
|
+
limit: int = 5,
|
560
|
+
similarity_threshold: float = 0.7,
|
561
|
+
content_types: Optional[List[str]] = None,
|
562
|
+
languages: Optional[List[str]] = None
|
563
|
+
):
|
564
|
+
"""Perform semantic search using pgvector with context filtering."""
|
565
|
+
from pgvector.django import CosineDistance
|
566
|
+
|
567
|
+
queryset = cls.objects.filter(
|
568
|
+
user=user,
|
569
|
+
embedding__isnull=False
|
570
|
+
)
|
571
|
+
|
572
|
+
# Apply content type filter
|
573
|
+
if content_types:
|
574
|
+
queryset = queryset.filter(
|
575
|
+
item__content_type__in=content_types
|
576
|
+
)
|
577
|
+
|
578
|
+
# Apply language filter
|
579
|
+
if languages:
|
580
|
+
queryset = queryset.filter(
|
581
|
+
item__language__in=languages
|
582
|
+
)
|
583
|
+
|
584
|
+
return queryset.annotate(
|
585
|
+
similarity=1 - CosineDistance('embedding', query_embedding)
|
586
|
+
).filter(
|
587
|
+
similarity__gte=similarity_threshold
|
588
|
+
).order_by('-similarity')[:limit]
|
589
|
+
|
590
|
+
def get_context_summary(self) -> Dict[str, Any]:
|
591
|
+
"""Get summary of chunk context for display."""
|
592
|
+
return {
|
593
|
+
'archive_title': self.archive.title,
|
594
|
+
'item_path': self.item.relative_path,
|
595
|
+
'item_type': self.item.content_type,
|
596
|
+
'language': self.item.language,
|
597
|
+
'chunk_position': f"{self.chunk_index + 1}/{self.item.chunks_count}",
|
598
|
+
'chunk_type': self.chunk_type,
|
599
|
+
}
|
@@ -0,0 +1,58 @@
|
|
1
|
+
"""
|
2
|
+
Base models for knowledge base application.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from django.db import models
|
6
|
+
from django.contrib.auth import get_user_model
|
7
|
+
from django.conf import settings
|
8
|
+
from enum import Enum
|
9
|
+
import uuid
|
10
|
+
|
11
|
+
|
12
|
+
class ProcessingStatus(models.TextChoices):
|
13
|
+
"""Document processing status enumeration."""
|
14
|
+
PENDING = "pending", "Pending"
|
15
|
+
PROCESSING = "processing", "Processing"
|
16
|
+
COMPLETED = "completed", "Completed"
|
17
|
+
FAILED = "failed", "Failed"
|
18
|
+
CANCELLED = "cancelled", "Cancelled"
|
19
|
+
|
20
|
+
|
21
|
+
class TimestampedModel(models.Model):
|
22
|
+
"""Base model with automatic timestamps."""
|
23
|
+
|
24
|
+
id = models.UUIDField(
|
25
|
+
primary_key=True,
|
26
|
+
default=uuid.uuid4,
|
27
|
+
editable=False
|
28
|
+
)
|
29
|
+
created_at = models.DateTimeField(auto_now_add=True, db_index=True)
|
30
|
+
updated_at = models.DateTimeField(auto_now=True)
|
31
|
+
|
32
|
+
class Meta:
|
33
|
+
abstract = True
|
34
|
+
indexes = [
|
35
|
+
models.Index(fields=['-created_at']),
|
36
|
+
]
|
37
|
+
|
38
|
+
@property
|
39
|
+
def short_uuid(self) -> str:
|
40
|
+
"""Return first 6 characters of UUID for display."""
|
41
|
+
return str(self.id)[:6]
|
42
|
+
|
43
|
+
|
44
|
+
class UserScopedModel(TimestampedModel):
|
45
|
+
"""Base model with user isolation."""
|
46
|
+
|
47
|
+
user = models.ForeignKey(
|
48
|
+
settings.AUTH_USER_MODEL,
|
49
|
+
on_delete=models.CASCADE,
|
50
|
+
db_index=True,
|
51
|
+
help_text="Owner of this record"
|
52
|
+
)
|
53
|
+
|
54
|
+
class Meta:
|
55
|
+
abstract = True
|
56
|
+
indexes = [
|
57
|
+
models.Index(fields=['user', '-created_at']),
|
58
|
+
]
|