django-cfg 1.1.81__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_cfg/__init__.py +20 -448
- django_cfg/apps/accounts/README.md +3 -3
- django_cfg/apps/accounts/admin/__init__.py +0 -2
- django_cfg/apps/accounts/admin/activity.py +2 -9
- django_cfg/apps/accounts/admin/filters.py +0 -42
- django_cfg/apps/accounts/admin/inlines.py +8 -8
- django_cfg/apps/accounts/admin/otp.py +5 -5
- django_cfg/apps/accounts/admin/registration_source.py +1 -8
- django_cfg/apps/accounts/admin/user.py +12 -20
- django_cfg/apps/accounts/managers/user_manager.py +2 -129
- django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
- django_cfg/apps/accounts/models.py +3 -123
- django_cfg/apps/accounts/serializers/otp.py +40 -44
- django_cfg/apps/accounts/serializers/profile.py +0 -2
- django_cfg/apps/accounts/services/otp_service.py +98 -186
- django_cfg/apps/accounts/signals.py +25 -15
- django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
- django_cfg/apps/accounts/views/otp.py +35 -36
- django_cfg/apps/agents/README.md +129 -0
- django_cfg/apps/agents/__init__.py +68 -0
- django_cfg/apps/agents/admin/__init__.py +17 -0
- django_cfg/apps/agents/admin/execution_admin.py +460 -0
- django_cfg/apps/agents/admin/registry_admin.py +360 -0
- django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
- django_cfg/apps/agents/apps.py +29 -0
- django_cfg/apps/agents/core/__init__.py +20 -0
- django_cfg/apps/agents/core/agent.py +281 -0
- django_cfg/apps/agents/core/dependencies.py +154 -0
- django_cfg/apps/agents/core/exceptions.py +66 -0
- django_cfg/apps/agents/core/models.py +106 -0
- django_cfg/apps/agents/core/orchestrator.py +391 -0
- django_cfg/apps/agents/examples/__init__.py +3 -0
- django_cfg/apps/agents/examples/simple_example.py +161 -0
- django_cfg/apps/agents/integration/__init__.py +14 -0
- django_cfg/apps/agents/integration/middleware.py +80 -0
- django_cfg/apps/agents/integration/registry.py +345 -0
- django_cfg/apps/agents/integration/signals.py +50 -0
- django_cfg/apps/agents/management/__init__.py +3 -0
- django_cfg/apps/agents/management/commands/__init__.py +3 -0
- django_cfg/apps/agents/management/commands/create_agent.py +365 -0
- django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
- django_cfg/apps/agents/managers/__init__.py +23 -0
- django_cfg/apps/agents/managers/execution.py +236 -0
- django_cfg/apps/agents/managers/registry.py +254 -0
- django_cfg/apps/agents/managers/toolsets.py +496 -0
- django_cfg/apps/agents/migrations/0001_initial.py +286 -0
- django_cfg/apps/agents/migrations/__init__.py +5 -0
- django_cfg/apps/agents/models/__init__.py +15 -0
- django_cfg/apps/agents/models/execution.py +215 -0
- django_cfg/apps/agents/models/registry.py +220 -0
- django_cfg/apps/agents/models/toolsets.py +305 -0
- django_cfg/apps/agents/patterns/__init__.py +24 -0
- django_cfg/apps/agents/patterns/content_agents.py +234 -0
- django_cfg/apps/agents/toolsets/__init__.py +15 -0
- django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
- django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
- django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
- django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
- django_cfg/apps/agents/urls.py +46 -0
- django_cfg/apps/knowbase/README.md +150 -0
- django_cfg/apps/knowbase/__init__.py +27 -0
- django_cfg/apps/knowbase/admin/__init__.py +23 -0
- django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
- django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
- django_cfg/apps/knowbase/admin/document_admin.py +650 -0
- django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
- django_cfg/apps/knowbase/apps.py +81 -0
- django_cfg/apps/knowbase/config/README.md +176 -0
- django_cfg/apps/knowbase/config/__init__.py +51 -0
- django_cfg/apps/knowbase/config/constance_fields.py +186 -0
- django_cfg/apps/knowbase/config/constance_settings.py +200 -0
- django_cfg/apps/knowbase/config/settings.py +444 -0
- django_cfg/apps/knowbase/examples/__init__.py +3 -0
- django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
- django_cfg/apps/knowbase/management/__init__.py +0 -0
- django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
- django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
- django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
- django_cfg/apps/knowbase/managers/__init__.py +22 -0
- django_cfg/apps/knowbase/managers/archive.py +426 -0
- django_cfg/apps/knowbase/managers/base.py +32 -0
- django_cfg/apps/knowbase/managers/chat.py +141 -0
- django_cfg/apps/knowbase/managers/document.py +203 -0
- django_cfg/apps/knowbase/managers/external_data.py +471 -0
- django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
- django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
- django_cfg/apps/knowbase/migrations/__init__.py +5 -0
- django_cfg/apps/knowbase/mixins/__init__.py +15 -0
- django_cfg/apps/knowbase/mixins/config.py +108 -0
- django_cfg/apps/knowbase/mixins/creator.py +81 -0
- django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
- django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
- django_cfg/apps/knowbase/mixins/service.py +362 -0
- django_cfg/apps/knowbase/models/__init__.py +41 -0
- django_cfg/apps/knowbase/models/archive.py +599 -0
- django_cfg/apps/knowbase/models/base.py +58 -0
- django_cfg/apps/knowbase/models/chat.py +157 -0
- django_cfg/apps/knowbase/models/document.py +267 -0
- django_cfg/apps/knowbase/models/external_data.py +376 -0
- django_cfg/apps/knowbase/serializers/__init__.py +68 -0
- django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
- django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
- django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
- django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
- django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
- django_cfg/apps/knowbase/services/__init__.py +40 -0
- django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
- django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
- django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
- django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
- django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
- django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
- django_cfg/apps/knowbase/services/base.py +53 -0
- django_cfg/apps/knowbase/services/chat_service.py +239 -0
- django_cfg/apps/knowbase/services/document_service.py +144 -0
- django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
- django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
- django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
- django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
- django_cfg/apps/knowbase/services/embedding/models.py +229 -0
- django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
- django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
- django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
- django_cfg/apps/knowbase/services/search_service.py +293 -0
- django_cfg/apps/knowbase/signals/__init__.py +21 -0
- django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
- django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
- django_cfg/apps/knowbase/signals/document_signals.py +143 -0
- django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
- django_cfg/apps/knowbase/tasks/__init__.py +39 -0
- django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
- django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
- django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
- django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
- django_cfg/apps/knowbase/urls.py +43 -0
- django_cfg/apps/knowbase/utils/__init__.py +12 -0
- django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
- django_cfg/apps/knowbase/utils/text_processing.py +375 -0
- django_cfg/apps/knowbase/utils/validation.py +99 -0
- django_cfg/apps/knowbase/views/__init__.py +28 -0
- django_cfg/apps/knowbase/views/archive_views.py +469 -0
- django_cfg/apps/knowbase/views/base.py +49 -0
- django_cfg/apps/knowbase/views/chat_views.py +181 -0
- django_cfg/apps/knowbase/views/document_views.py +183 -0
- django_cfg/apps/knowbase/views/public_views.py +129 -0
- django_cfg/apps/leads/admin.py +70 -0
- django_cfg/apps/newsletter/admin.py +234 -0
- django_cfg/apps/newsletter/admin_filters.py +124 -0
- django_cfg/apps/support/admin.py +196 -0
- django_cfg/apps/support/admin_filters.py +71 -0
- django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
- django_cfg/apps/urls.py +5 -4
- django_cfg/cli/README.md +1 -1
- django_cfg/cli/commands/create_project.py +2 -2
- django_cfg/cli/commands/info.py +1 -1
- django_cfg/config.py +44 -0
- django_cfg/core/config.py +29 -82
- django_cfg/core/environment.py +1 -1
- django_cfg/core/generation.py +19 -107
- django_cfg/{integration.py → core/integration.py} +18 -16
- django_cfg/core/validation.py +1 -1
- django_cfg/management/__init__.py +1 -1
- django_cfg/management/commands/__init__.py +1 -1
- django_cfg/management/commands/auto_generate.py +482 -0
- django_cfg/management/commands/migrator.py +19 -101
- django_cfg/management/commands/test_email.py +1 -1
- django_cfg/middleware/README.md +0 -158
- django_cfg/middleware/__init__.py +0 -2
- django_cfg/middleware/user_activity.py +3 -3
- django_cfg/models/api.py +145 -0
- django_cfg/models/base.py +287 -0
- django_cfg/models/cache.py +4 -4
- django_cfg/models/constance.py +25 -88
- django_cfg/models/database.py +9 -9
- django_cfg/models/drf.py +3 -36
- django_cfg/models/email.py +163 -0
- django_cfg/models/environment.py +276 -0
- django_cfg/models/limits.py +1 -1
- django_cfg/models/logging.py +366 -0
- django_cfg/models/revolution.py +41 -2
- django_cfg/models/security.py +125 -0
- django_cfg/models/services.py +1 -1
- django_cfg/modules/__init__.py +2 -56
- django_cfg/modules/base.py +78 -52
- django_cfg/modules/django_currency/service.py +2 -2
- django_cfg/modules/django_email.py +2 -2
- django_cfg/modules/django_health.py +267 -0
- django_cfg/modules/django_llm/llm/client.py +79 -17
- django_cfg/modules/django_llm/translator/translator.py +2 -2
- django_cfg/modules/django_logger.py +2 -2
- django_cfg/modules/django_ngrok.py +2 -2
- django_cfg/modules/django_tasks.py +68 -3
- django_cfg/modules/django_telegram.py +3 -3
- django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
- django_cfg/modules/django_twilio/service.py +2 -2
- django_cfg/modules/django_twilio/simple_service.py +2 -2
- django_cfg/modules/django_twilio/templates/guide.md +266 -0
- django_cfg/modules/django_twilio/twilio_service.py +2 -2
- django_cfg/modules/django_unfold/__init__.py +69 -0
- django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
- django_cfg/modules/django_unfold/dashboard.py +278 -0
- django_cfg/modules/django_unfold/icons/README.md +145 -0
- django_cfg/modules/django_unfold/icons/__init__.py +12 -0
- django_cfg/modules/django_unfold/icons/constants.py +2851 -0
- django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
- django_cfg/modules/django_unfold/models/__init__.py +42 -0
- django_cfg/modules/django_unfold/models/config.py +601 -0
- django_cfg/modules/django_unfold/models/dashboard.py +206 -0
- django_cfg/modules/django_unfold/models/dropdown.py +40 -0
- django_cfg/modules/django_unfold/models/navigation.py +73 -0
- django_cfg/modules/django_unfold/models/tabs.py +25 -0
- django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
- django_cfg/modules/django_unfold/utils.py +140 -0
- django_cfg/registry/__init__.py +23 -0
- django_cfg/registry/core.py +61 -0
- django_cfg/registry/exceptions.py +11 -0
- django_cfg/registry/modules.py +12 -0
- django_cfg/registry/services.py +26 -0
- django_cfg/registry/third_party.py +52 -0
- django_cfg/routing/__init__.py +19 -0
- django_cfg/routing/callbacks.py +198 -0
- django_cfg/routing/routers.py +48 -0
- django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
- django_cfg/templatetags/__init__.py +0 -0
- django_cfg/templatetags/django_cfg.py +33 -0
- django_cfg/urls.py +33 -0
- django_cfg/utils/path_resolution.py +1 -1
- django_cfg/utils/smart_defaults.py +7 -61
- django_cfg/utils/toolkit.py +663 -0
- {django_cfg-1.1.81.dist-info → django_cfg-1.2.0.dist-info}/METADATA +83 -86
- django_cfg-1.2.0.dist-info/RECORD +441 -0
- django_cfg/apps/tasks/@docs/README.md +0 -195
- django_cfg/archive/django_sample.zip +0 -0
- django_cfg/models/unfold.py +0 -271
- django_cfg/modules/unfold/__init__.py +0 -29
- django_cfg/modules/unfold/dashboard.py +0 -318
- django_cfg/pyproject.toml +0 -370
- django_cfg/routers.py +0 -83
- django_cfg-1.1.81.dist-info/RECORD +0 -278
- /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
- /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
- /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
- /django_cfg/{version_check.py → utils/version_check.py} +0 -0
- {django_cfg-1.1.81.dist-info → django_cfg-1.2.0.dist-info}/WHEEL +0 -0
- {django_cfg-1.1.81.dist-info → django_cfg-1.2.0.dist-info}/entry_points.txt +0 -0
- {django_cfg-1.1.81.dist-info → django_cfg-1.2.0.dist-info}/licenses/LICENSE +0 -0
django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py
ADDED
@@ -0,0 +1,434 @@
|
|
1
|
+
# Generated by Django 5.2.6 on 2025-09-20 16:57
|
2
|
+
|
3
|
+
import django.db.models.deletion
|
4
|
+
import pgvector.django.vector
|
5
|
+
import uuid
|
6
|
+
from django.conf import settings
|
7
|
+
from django.db import migrations, models
|
8
|
+
|
9
|
+
|
10
|
+
class Migration(migrations.Migration):
|
11
|
+
|
12
|
+
dependencies = [
|
13
|
+
('django_cfg_knowbase', '0001_initial'),
|
14
|
+
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
15
|
+
]
|
16
|
+
|
17
|
+
operations = [
|
18
|
+
migrations.CreateModel(
|
19
|
+
name='ArchiveItem',
|
20
|
+
fields=[
|
21
|
+
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
22
|
+
('created_at', models.DateTimeField(auto_now_add=True, db_index=True)),
|
23
|
+
('updated_at', models.DateTimeField(auto_now=True)),
|
24
|
+
('relative_path', models.CharField(help_text='Path within archive', max_length=1024)),
|
25
|
+
('item_name', models.CharField(help_text='Item name', max_length=255)),
|
26
|
+
('item_type', models.CharField(help_text='MIME type', max_length=100)),
|
27
|
+
('content_type', models.CharField(choices=[('document', 'Document'), ('code', 'Code'), ('image', 'Image'), ('data', 'Data'), ('archive', 'Archive'), ('unknown', 'Unknown')], default='unknown', help_text='Content classification', max_length=20)),
|
28
|
+
('file_size', models.PositiveIntegerField(default=0, help_text='Item size in bytes')),
|
29
|
+
('content_hash', models.CharField(help_text='SHA-256 hash of item content', max_length=64)),
|
30
|
+
('raw_content', models.TextField(blank=True, help_text='Extracted text content')),
|
31
|
+
('is_processable', models.BooleanField(default=False, help_text='Whether item can be processed for chunks')),
|
32
|
+
('language', models.CharField(blank=True, help_text='Programming language or document language', max_length=50)),
|
33
|
+
('encoding', models.CharField(default='utf-8', help_text='Character encoding', max_length=50)),
|
34
|
+
('chunks_count', models.PositiveIntegerField(default=0, help_text='Number of chunks created')),
|
35
|
+
('total_tokens', models.PositiveIntegerField(default=0, help_text='Total tokens in all chunks')),
|
36
|
+
('processing_cost', models.FloatField(default=0.0, help_text='Processing cost for this item')),
|
37
|
+
('metadata', models.JSONField(blank=True, default=dict, help_text='Item-specific metadata', null=True)),
|
38
|
+
],
|
39
|
+
options={
|
40
|
+
'verbose_name': 'Archive Item',
|
41
|
+
'verbose_name_plural': 'Archive Items',
|
42
|
+
'db_table': 'django_cfg_knowbase_archive_items',
|
43
|
+
'ordering': ['archive', 'relative_path'],
|
44
|
+
},
|
45
|
+
),
|
46
|
+
migrations.CreateModel(
|
47
|
+
name='ArchiveItemChunk',
|
48
|
+
fields=[
|
49
|
+
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
50
|
+
('created_at', models.DateTimeField(auto_now_add=True, db_index=True)),
|
51
|
+
('updated_at', models.DateTimeField(auto_now=True)),
|
52
|
+
('content', models.TextField(help_text='Chunk text content')),
|
53
|
+
('chunk_index', models.PositiveIntegerField(help_text='Sequential chunk number within item')),
|
54
|
+
('chunk_type', models.CharField(choices=[('text', 'Text'), ('code', 'Code'), ('heading', 'Heading'), ('metadata', 'Metadata'), ('table', 'Table'), ('list', 'List')], default='text', help_text='Type of content in chunk', max_length=20)),
|
55
|
+
('context_metadata', models.JSONField(default=dict, help_text='Rich context information for AI processing')),
|
56
|
+
('embedding', pgvector.django.vector.VectorField(dimensions=1536, help_text='Vector embedding for semantic search', null=True)),
|
57
|
+
('token_count', models.PositiveIntegerField(default=0, help_text='Number of tokens in chunk')),
|
58
|
+
('character_count', models.PositiveIntegerField(default=0, help_text='Number of characters in chunk')),
|
59
|
+
('embedding_model', models.CharField(default='text-embedding-ada-002', help_text='Model used for embedding generation', max_length=100)),
|
60
|
+
('embedding_cost', models.FloatField(default=0.0, help_text='Cost in USD for embedding generation')),
|
61
|
+
],
|
62
|
+
options={
|
63
|
+
'verbose_name': 'Archive Item Chunk',
|
64
|
+
'verbose_name_plural': 'Archive Item Chunks',
|
65
|
+
'db_table': 'django_cfg_knowbase_archive_item_chunks',
|
66
|
+
'ordering': ['item', 'chunk_index'],
|
67
|
+
},
|
68
|
+
),
|
69
|
+
migrations.CreateModel(
|
70
|
+
name='DocumentArchive',
|
71
|
+
fields=[
|
72
|
+
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
73
|
+
('created_at', models.DateTimeField(auto_now_add=True, db_index=True)),
|
74
|
+
('updated_at', models.DateTimeField(auto_now=True)),
|
75
|
+
('title', models.CharField(help_text='Archive title', max_length=512)),
|
76
|
+
('description', models.TextField(blank=True, help_text='Archive description')),
|
77
|
+
('is_public', models.BooleanField(default=True, help_text='Whether this archive is publicly accessible')),
|
78
|
+
('archive_file', models.FileField(help_text='Uploaded archive file', upload_to='archives/%Y/%m/%d/')),
|
79
|
+
('original_filename', models.CharField(help_text='Original uploaded filename', max_length=255)),
|
80
|
+
('file_size', models.PositiveIntegerField(default=0, help_text='Archive size in bytes')),
|
81
|
+
('archive_type', models.CharField(choices=[('zip', 'ZIP'), ('tar', 'TAR'), ('tar.gz', 'TAR.GZ'), ('tar.bz2', 'TAR.BZ2')], help_text='Archive format', max_length=20)),
|
82
|
+
('content_hash', models.CharField(db_index=True, help_text='SHA-256 hash for duplicate detection', max_length=64)),
|
83
|
+
('processing_status', models.CharField(choices=[('pending', 'Pending'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed'), ('cancelled', 'Cancelled')], db_index=True, default='pending', max_length=20)),
|
84
|
+
('processed_at', models.DateTimeField(blank=True, help_text='When processing completed', null=True)),
|
85
|
+
('processing_error', models.TextField(blank=True, default='', help_text='Error message if processing failed')),
|
86
|
+
('processing_duration_ms', models.PositiveIntegerField(default=0, help_text='Processing time in milliseconds')),
|
87
|
+
('total_items', models.PositiveIntegerField(default=0, help_text='Total items in archive')),
|
88
|
+
('processed_items', models.PositiveIntegerField(default=0, help_text='Successfully processed items')),
|
89
|
+
('total_chunks', models.PositiveIntegerField(default=0, help_text='Total chunks created')),
|
90
|
+
('vectorized_chunks', models.PositiveIntegerField(default=0, help_text='Chunks with embeddings')),
|
91
|
+
('total_tokens', models.PositiveIntegerField(default=0, help_text='Total tokens across all chunks')),
|
92
|
+
('total_cost_usd', models.FloatField(default=0.0, help_text='Total processing cost in USD')),
|
93
|
+
('metadata', models.JSONField(blank=True, default=dict, help_text='Additional archive metadata', null=True)),
|
94
|
+
],
|
95
|
+
options={
|
96
|
+
'verbose_name': 'Document Archive',
|
97
|
+
'verbose_name_plural': 'Document Archives',
|
98
|
+
'db_table': 'django_cfg_knowbase_document_archives',
|
99
|
+
'ordering': ['-created_at'],
|
100
|
+
},
|
101
|
+
),
|
102
|
+
migrations.CreateModel(
|
103
|
+
name='DocumentCategory',
|
104
|
+
fields=[
|
105
|
+
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
106
|
+
('created_at', models.DateTimeField(auto_now_add=True, db_index=True)),
|
107
|
+
('updated_at', models.DateTimeField(auto_now=True)),
|
108
|
+
('name', models.CharField(help_text='Category name', max_length=255, unique=True)),
|
109
|
+
('description', models.TextField(blank=True, help_text='Category description')),
|
110
|
+
('is_public', models.BooleanField(default=True, help_text='Whether documents in this category are publicly accessible')),
|
111
|
+
],
|
112
|
+
options={
|
113
|
+
'verbose_name': 'Document Category',
|
114
|
+
'verbose_name_plural': 'Document Categories',
|
115
|
+
'db_table': 'django_cfg_knowbase_document_categories',
|
116
|
+
'ordering': ['name'],
|
117
|
+
},
|
118
|
+
),
|
119
|
+
migrations.CreateModel(
|
120
|
+
name='ExternalData',
|
121
|
+
fields=[
|
122
|
+
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
123
|
+
('created_at', models.DateTimeField(auto_now_add=True, db_index=True)),
|
124
|
+
('updated_at', models.DateTimeField(auto_now=True)),
|
125
|
+
('title', models.CharField(help_text='Human-readable title for this external data source', max_length=512)),
|
126
|
+
('description', models.TextField(blank=True, help_text='Description of what this external data contains')),
|
127
|
+
('source_type', models.CharField(choices=[('model', 'Django Model'), ('api', 'API Endpoint'), ('database', 'Database Query'), ('file', 'File System'), ('custom', 'Custom Source')], default='model', help_text='Type of external data source', max_length=20)),
|
128
|
+
('source_identifier', models.CharField(blank=True, help_text="Unique identifier for the data source (e.g., 'vehicles_data.Vehicle')", max_length=255)),
|
129
|
+
('source_config', models.JSONField(blank=True, default=dict, help_text='Configuration for data extraction (fields, filters, etc.)')),
|
130
|
+
('content', models.TextField(blank=True, help_text='Extracted text content for vectorization')),
|
131
|
+
('content_hash', models.CharField(blank=True, db_index=True, help_text='SHA256 hash of content for change detection', max_length=64)),
|
132
|
+
('metadata', models.JSONField(blank=True, default=dict, help_text='Additional metadata from the source')),
|
133
|
+
('status', models.CharField(choices=[('pending', 'Pending'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed'), ('outdated', 'Outdated')], default='pending', help_text='Current processing status', max_length=20)),
|
134
|
+
('processing_error', models.TextField(blank=True, help_text='Error message if processing failed')),
|
135
|
+
('chunk_size', models.PositiveIntegerField(default=1000, help_text='Size of text chunks for vectorization')),
|
136
|
+
('overlap_size', models.PositiveIntegerField(default=200, help_text='Overlap between chunks')),
|
137
|
+
('embedding_model', models.CharField(default='text-embedding-ada-002', help_text='Embedding model used for vectorization', max_length=100)),
|
138
|
+
('similarity_threshold', models.FloatField(default=0.5, help_text='Similarity threshold for this external data (0.0-1.0). Lower = more results, higher = more precise')),
|
139
|
+
('processed_at', models.DateTimeField(blank=True, help_text='When the data was last processed', null=True)),
|
140
|
+
('source_updated_at', models.DateTimeField(blank=True, help_text='When the source data was last updated', null=True)),
|
141
|
+
('total_chunks', models.PositiveIntegerField(default=0, help_text='Total number of chunks created')),
|
142
|
+
('total_tokens', models.PositiveIntegerField(default=0, help_text='Total tokens processed')),
|
143
|
+
('processing_cost', models.FloatField(default=0.0, help_text='Total cost for processing this data (USD)')),
|
144
|
+
('tags', models.JSONField(blank=True, default=list, help_text='Tags for categorization and filtering')),
|
145
|
+
('is_active', models.BooleanField(default=True, help_text='Whether this data source is active for search')),
|
146
|
+
('is_public', models.BooleanField(default=False, help_text='Whether this data is publicly searchable')),
|
147
|
+
],
|
148
|
+
options={
|
149
|
+
'verbose_name': 'External Data',
|
150
|
+
'verbose_name_plural': 'External Data',
|
151
|
+
'db_table': 'django_cfg_knowbase_external_data',
|
152
|
+
'ordering': ['-processed_at', '-created_at'],
|
153
|
+
},
|
154
|
+
),
|
155
|
+
migrations.CreateModel(
|
156
|
+
name='ExternalDataChunk',
|
157
|
+
fields=[
|
158
|
+
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
159
|
+
('created_at', models.DateTimeField(auto_now_add=True, db_index=True)),
|
160
|
+
('updated_at', models.DateTimeField(auto_now=True)),
|
161
|
+
('content', models.TextField(blank=True, help_text='Text content of the chunk')),
|
162
|
+
('chunk_index', models.PositiveIntegerField(default=0, help_text='Sequential index of this chunk within the external data')),
|
163
|
+
('embedding', pgvector.django.vector.VectorField(blank=True, dimensions=1536, help_text='Vector embedding for semantic search', null=True)),
|
164
|
+
('embedding_model', models.CharField(default='text-embedding-ada-002', help_text='Model used for embedding generation', max_length=100)),
|
165
|
+
('token_count', models.PositiveIntegerField(default=0, help_text='Number of tokens in this chunk')),
|
166
|
+
('character_count', models.PositiveIntegerField(default=0, help_text='Number of characters in this chunk')),
|
167
|
+
('embedding_cost', models.FloatField(default=0.0, help_text='Cost for generating this embedding (USD)')),
|
168
|
+
('chunk_metadata', models.JSONField(blank=True, default=dict, help_text='Additional metadata for this specific chunk')),
|
169
|
+
],
|
170
|
+
options={
|
171
|
+
'verbose_name': 'External Data Chunk',
|
172
|
+
'verbose_name_plural': 'External Data Chunks',
|
173
|
+
'db_table': 'django_cfg_knowbase_external_data_chunk',
|
174
|
+
'ordering': ['external_data', 'chunk_index'],
|
175
|
+
},
|
176
|
+
),
|
177
|
+
migrations.RenameIndex(
|
178
|
+
model_name='chatmessage',
|
179
|
+
new_name='django_cfg__session_564129_idx',
|
180
|
+
old_name='django_cfg_knowbase_ch_session_782e67_idx',
|
181
|
+
),
|
182
|
+
migrations.RenameIndex(
|
183
|
+
model_name='chatmessage',
|
184
|
+
new_name='django_cfg__role_a6e8b7_idx',
|
185
|
+
old_name='django_cfg_knowbase_ch_role_e0d53e_idx',
|
186
|
+
),
|
187
|
+
migrations.RenameIndex(
|
188
|
+
model_name='chatmessage',
|
189
|
+
new_name='django_cfg__created_875295_idx',
|
190
|
+
old_name='django_cfg_knowbase_ch_created_49bad9_idx',
|
191
|
+
),
|
192
|
+
migrations.RenameIndex(
|
193
|
+
model_name='chatsession',
|
194
|
+
new_name='django_cfg__user_id_6bf22b_idx',
|
195
|
+
old_name='django_cfg_knowbase_ch_user_id_227a9f_idx',
|
196
|
+
),
|
197
|
+
migrations.RenameIndex(
|
198
|
+
model_name='chatsession',
|
199
|
+
new_name='django_cfg__is_acti_12448e_idx',
|
200
|
+
old_name='django_cfg_knowbase_ch_is_acti_6fcdc8_idx',
|
201
|
+
),
|
202
|
+
migrations.RenameIndex(
|
203
|
+
model_name='document',
|
204
|
+
new_name='django_cfg__user_id_ea1b86_idx',
|
205
|
+
old_name='django_cfg_knowbase_do_user_id_3f12df_idx',
|
206
|
+
),
|
207
|
+
migrations.RenameIndex(
|
208
|
+
model_name='document',
|
209
|
+
new_name='django_cfg__content_ebac19_idx',
|
210
|
+
old_name='django_cfg_knowbase_do_content_0655c8_idx',
|
211
|
+
),
|
212
|
+
migrations.RenameIndex(
|
213
|
+
model_name='document',
|
214
|
+
new_name='django_cfg__process_5f6075_idx',
|
215
|
+
old_name='django_cfg_knowbase_do_process_f2c2a8_idx',
|
216
|
+
),
|
217
|
+
migrations.RenameIndex(
|
218
|
+
model_name='documentchunk',
|
219
|
+
new_name='django_cfg__user_id_ddd3b5_idx',
|
220
|
+
old_name='django_cfg_knowbase_do_user_id_a4e4de_idx',
|
221
|
+
),
|
222
|
+
migrations.RenameIndex(
|
223
|
+
model_name='documentchunk',
|
224
|
+
new_name='django_cfg__documen_394c50_idx',
|
225
|
+
old_name='django_cfg_knowbase_do_documen_64aaaa_idx',
|
226
|
+
),
|
227
|
+
migrations.AddField(
|
228
|
+
model_name='document',
|
229
|
+
name='is_public',
|
230
|
+
field=models.BooleanField(default=True, help_text='Whether this document is publicly accessible'),
|
231
|
+
),
|
232
|
+
migrations.AlterField(
|
233
|
+
model_name='chatmessage',
|
234
|
+
name='cost_usd',
|
235
|
+
field=models.FloatField(default=0.0, help_text='Cost in USD for this message'),
|
236
|
+
),
|
237
|
+
migrations.AlterField(
|
238
|
+
model_name='chatsession',
|
239
|
+
name='total_cost_usd',
|
240
|
+
field=models.FloatField(default=0.0, help_text='Total session cost for monitoring'),
|
241
|
+
),
|
242
|
+
migrations.AlterField(
|
243
|
+
model_name='document',
|
244
|
+
name='metadata',
|
245
|
+
field=models.JSONField(blank=True, default=dict, help_text='Additional document metadata', null=True),
|
246
|
+
),
|
247
|
+
migrations.AlterField(
|
248
|
+
model_name='document',
|
249
|
+
name='total_cost_usd',
|
250
|
+
field=models.FloatField(default=0.0, help_text='Total processing cost in USD'),
|
251
|
+
),
|
252
|
+
migrations.AlterField(
|
253
|
+
model_name='documentchunk',
|
254
|
+
name='embedding_cost',
|
255
|
+
field=models.FloatField(default=0.0, help_text='Cost in USD for embedding generation'),
|
256
|
+
),
|
257
|
+
migrations.AlterField(
|
258
|
+
model_name='documentchunk',
|
259
|
+
name='metadata',
|
260
|
+
field=models.JSONField(blank=True, default=dict, help_text='Chunk-specific metadata', null=True),
|
261
|
+
),
|
262
|
+
migrations.AddField(
|
263
|
+
model_name='archiveitem',
|
264
|
+
name='user',
|
265
|
+
field=models.ForeignKey(help_text='Owner of this record', on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
266
|
+
),
|
267
|
+
migrations.AddField(
|
268
|
+
model_name='archiveitemchunk',
|
269
|
+
name='item',
|
270
|
+
field=models.ForeignKey(help_text='Parent item', on_delete=django.db.models.deletion.CASCADE, related_name='chunks', to='django_cfg_knowbase.archiveitem'),
|
271
|
+
),
|
272
|
+
migrations.AddField(
|
273
|
+
model_name='archiveitemchunk',
|
274
|
+
name='user',
|
275
|
+
field=models.ForeignKey(help_text='Owner of this record', on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
276
|
+
),
|
277
|
+
migrations.AddField(
|
278
|
+
model_name='documentarchive',
|
279
|
+
name='user',
|
280
|
+
field=models.ForeignKey(help_text='Owner of this record', on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
281
|
+
),
|
282
|
+
migrations.AddField(
|
283
|
+
model_name='archiveitemchunk',
|
284
|
+
name='archive',
|
285
|
+
field=models.ForeignKey(help_text='Parent archive', on_delete=django.db.models.deletion.CASCADE, related_name='chunks', to='django_cfg_knowbase.documentarchive'),
|
286
|
+
),
|
287
|
+
migrations.AddField(
|
288
|
+
model_name='archiveitem',
|
289
|
+
name='archive',
|
290
|
+
field=models.ForeignKey(help_text='Parent archive', on_delete=django.db.models.deletion.CASCADE, related_name='items', to='django_cfg_knowbase.documentarchive'),
|
291
|
+
),
|
292
|
+
migrations.AddField(
|
293
|
+
model_name='documentarchive',
|
294
|
+
name='categories',
|
295
|
+
field=models.ManyToManyField(blank=True, help_text='Archive categories (supports multiple)', related_name='archives', to='django_cfg_knowbase.documentcategory'),
|
296
|
+
),
|
297
|
+
migrations.AddField(
|
298
|
+
model_name='document',
|
299
|
+
name='categories',
|
300
|
+
field=models.ManyToManyField(blank=True, help_text='Document categories (supports multiple)', related_name='documents', to='django_cfg_knowbase.documentcategory'),
|
301
|
+
),
|
302
|
+
migrations.AddIndex(
|
303
|
+
model_name='document',
|
304
|
+
index=models.Index(fields=['is_public', '-created_at'], name='django_cfg__is_publ_ebcc98_idx'),
|
305
|
+
),
|
306
|
+
migrations.AddField(
|
307
|
+
model_name='externaldata',
|
308
|
+
name='category',
|
309
|
+
field=models.ForeignKey(blank=True, help_text='Category for organization', null=True, on_delete=django.db.models.deletion.SET_NULL, to='django_cfg_knowbase.documentcategory'),
|
310
|
+
),
|
311
|
+
migrations.AddField(
|
312
|
+
model_name='externaldata',
|
313
|
+
name='user',
|
314
|
+
field=models.ForeignKey(help_text='Owner of this record', on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
315
|
+
),
|
316
|
+
migrations.AddField(
|
317
|
+
model_name='externaldatachunk',
|
318
|
+
name='external_data',
|
319
|
+
field=models.ForeignKey(help_text='External data this chunk belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='chunks', to='django_cfg_knowbase.externaldata'),
|
320
|
+
),
|
321
|
+
migrations.AddField(
|
322
|
+
model_name='externaldatachunk',
|
323
|
+
name='user',
|
324
|
+
field=models.ForeignKey(help_text='Owner of this chunk', on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
325
|
+
),
|
326
|
+
migrations.AddIndex(
|
327
|
+
model_name='archiveitemchunk',
|
328
|
+
index=models.Index(fields=['user'], name='django_cfg__user_id_a41505_idx'),
|
329
|
+
),
|
330
|
+
migrations.AddIndex(
|
331
|
+
model_name='archiveitemchunk',
|
332
|
+
index=models.Index(fields=['archive'], name='django_cfg__archive_176a8a_idx'),
|
333
|
+
),
|
334
|
+
migrations.AddIndex(
|
335
|
+
model_name='archiveitemchunk',
|
336
|
+
index=models.Index(fields=['item', 'chunk_index'], name='django_cfg__item_id_ec8c08_idx'),
|
337
|
+
),
|
338
|
+
migrations.AddIndex(
|
339
|
+
model_name='archiveitemchunk',
|
340
|
+
index=models.Index(fields=['chunk_type'], name='django_cfg__chunk_t_2a4993_idx'),
|
341
|
+
),
|
342
|
+
migrations.AddConstraint(
|
343
|
+
model_name='archiveitemchunk',
|
344
|
+
constraint=models.UniqueConstraint(fields=('item', 'chunk_index'), name='unique_item_chunk'),
|
345
|
+
),
|
346
|
+
migrations.AddIndex(
|
347
|
+
model_name='archiveitem',
|
348
|
+
index=models.Index(fields=['user'], name='django_cfg__user_id_73874f_idx'),
|
349
|
+
),
|
350
|
+
migrations.AddIndex(
|
351
|
+
model_name='archiveitem',
|
352
|
+
index=models.Index(fields=['archive', 'relative_path'], name='django_cfg__archive_42a267_idx'),
|
353
|
+
),
|
354
|
+
migrations.AddIndex(
|
355
|
+
model_name='archiveitem',
|
356
|
+
index=models.Index(fields=['content_type', 'is_processable'], name='django_cfg__content_ea7d23_idx'),
|
357
|
+
),
|
358
|
+
migrations.AddIndex(
|
359
|
+
model_name='archiveitem',
|
360
|
+
index=models.Index(fields=['language'], name='django_cfg__languag_f02bdb_idx'),
|
361
|
+
),
|
362
|
+
migrations.AddConstraint(
|
363
|
+
model_name='archiveitem',
|
364
|
+
constraint=models.UniqueConstraint(fields=('archive', 'relative_path'), name='unique_archive_item_path'),
|
365
|
+
),
|
366
|
+
migrations.AddIndex(
|
367
|
+
model_name='documentarchive',
|
368
|
+
index=models.Index(fields=['user', 'processing_status'], name='django_cfg__user_id_e4fa6e_idx'),
|
369
|
+
),
|
370
|
+
migrations.AddIndex(
|
371
|
+
model_name='documentarchive',
|
372
|
+
index=models.Index(fields=['content_hash'], name='django_cfg__content_22fdac_idx'),
|
373
|
+
),
|
374
|
+
migrations.AddIndex(
|
375
|
+
model_name='documentarchive',
|
376
|
+
index=models.Index(fields=['-processed_at'], name='django_cfg__process_c78bf9_idx'),
|
377
|
+
),
|
378
|
+
migrations.AddIndex(
|
379
|
+
model_name='documentarchive',
|
380
|
+
index=models.Index(fields=['is_public', '-created_at'], name='django_cfg__is_publ_3c6f28_idx'),
|
381
|
+
),
|
382
|
+
migrations.AddConstraint(
|
383
|
+
model_name='documentarchive',
|
384
|
+
constraint=models.UniqueConstraint(fields=('user', 'content_hash'), name='unique_user_archive'),
|
385
|
+
),
|
386
|
+
migrations.AddIndex(
|
387
|
+
model_name='externaldata',
|
388
|
+
index=models.Index(fields=['user', 'source_type'], name='django_cfg__user_id_d556c1_idx'),
|
389
|
+
),
|
390
|
+
migrations.AddIndex(
|
391
|
+
model_name='externaldata',
|
392
|
+
index=models.Index(fields=['status'], name='django_cfg__status_7b7d15_idx'),
|
393
|
+
),
|
394
|
+
migrations.AddIndex(
|
395
|
+
model_name='externaldata',
|
396
|
+
index=models.Index(fields=['is_active'], name='django_cfg__is_acti_9d3ce2_idx'),
|
397
|
+
),
|
398
|
+
migrations.AddIndex(
|
399
|
+
model_name='externaldata',
|
400
|
+
index=models.Index(fields=['processed_at'], name='django_cfg__process_fa1f31_idx'),
|
401
|
+
),
|
402
|
+
migrations.AddIndex(
|
403
|
+
model_name='externaldata',
|
404
|
+
index=models.Index(fields=['source_identifier'], name='django_cfg__source__e5ed3f_idx'),
|
405
|
+
),
|
406
|
+
migrations.AddIndex(
|
407
|
+
model_name='externaldata',
|
408
|
+
index=models.Index(fields=['content_hash'], name='django_cfg__content_f05aea_idx'),
|
409
|
+
),
|
410
|
+
migrations.AlterUniqueTogether(
|
411
|
+
name='externaldata',
|
412
|
+
unique_together={('user', 'source_identifier')},
|
413
|
+
),
|
414
|
+
migrations.AddIndex(
|
415
|
+
model_name='externaldatachunk',
|
416
|
+
index=models.Index(fields=['user', 'external_data'], name='django_cfg__user_id_7247ba_idx'),
|
417
|
+
),
|
418
|
+
migrations.AddIndex(
|
419
|
+
model_name='externaldatachunk',
|
420
|
+
index=models.Index(fields=['embedding_model'], name='django_cfg__embeddi_5947c8_idx'),
|
421
|
+
),
|
422
|
+
migrations.AddIndex(
|
423
|
+
model_name='externaldatachunk',
|
424
|
+
index=models.Index(fields=['token_count'], name='django_cfg__token_c_f8fa52_idx'),
|
425
|
+
),
|
426
|
+
migrations.AddIndex(
|
427
|
+
model_name='externaldatachunk',
|
428
|
+
index=models.Index(fields=['chunk_index'], name='django_cfg__chunk_i_8ecf40_idx'),
|
429
|
+
),
|
430
|
+
migrations.AlterUniqueTogether(
|
431
|
+
name='externaldatachunk',
|
432
|
+
unique_together={('external_data', 'chunk_index')},
|
433
|
+
),
|
434
|
+
]
|
@@ -0,0 +1,15 @@
|
|
1
|
+
"""
|
2
|
+
Mixins for knowbase integration.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from .external_data_mixin import ExternalDataMixin
|
6
|
+
from .config import ExternalDataConfig
|
7
|
+
from .creator import ExternalDataCreator
|
8
|
+
from .service import ExternalDataService
|
9
|
+
|
10
|
+
__all__ = [
|
11
|
+
'ExternalDataMixin',
|
12
|
+
'ExternalDataConfig',
|
13
|
+
'ExternalDataCreator',
|
14
|
+
'ExternalDataService',
|
15
|
+
]
|
@@ -0,0 +1,108 @@
|
|
1
|
+
"""
|
2
|
+
Configuration classes for ExternalDataMixin.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Optional, Dict, Any, List
|
6
|
+
from pydantic import BaseModel, Field, field_validator, ConfigDict
|
7
|
+
|
8
|
+
from ..models.external_data import ExternalDataType
|
9
|
+
|
10
|
+
|
11
|
+
class ExternalDataConfig(BaseModel):
|
12
|
+
"""Configuration for ExternalData creation with Pydantic2 validation."""
|
13
|
+
|
14
|
+
model_config = ConfigDict(
|
15
|
+
validate_assignment=True,
|
16
|
+
extra="forbid",
|
17
|
+
str_strip_whitespace=True,
|
18
|
+
validate_default=True
|
19
|
+
)
|
20
|
+
|
21
|
+
# Basic information
|
22
|
+
title: str = Field(
|
23
|
+
...,
|
24
|
+
min_length=3,
|
25
|
+
max_length=512,
|
26
|
+
description="Human-readable title for the external data source"
|
27
|
+
)
|
28
|
+
|
29
|
+
description: Optional[str] = Field(
|
30
|
+
default="",
|
31
|
+
max_length=2000,
|
32
|
+
description="Description of what this external data contains"
|
33
|
+
)
|
34
|
+
|
35
|
+
# Source configuration
|
36
|
+
source_type: ExternalDataType = Field(
|
37
|
+
default=ExternalDataType.MODEL,
|
38
|
+
description="Type of external data source"
|
39
|
+
)
|
40
|
+
|
41
|
+
source_identifier: str = Field(
|
42
|
+
...,
|
43
|
+
min_length=1,
|
44
|
+
max_length=255,
|
45
|
+
description="Unique identifier for the data source"
|
46
|
+
)
|
47
|
+
|
48
|
+
# Content
|
49
|
+
content: str = Field(
|
50
|
+
default="",
|
51
|
+
description="Extracted text content for vectorization"
|
52
|
+
)
|
53
|
+
|
54
|
+
# Search and processing settings
|
55
|
+
similarity_threshold: float = Field(
|
56
|
+
default=0.5,
|
57
|
+
ge=0.0,
|
58
|
+
le=1.0,
|
59
|
+
description="Similarity threshold for search (0.0-1.0)"
|
60
|
+
)
|
61
|
+
|
62
|
+
# Visibility settings
|
63
|
+
is_active: bool = Field(
|
64
|
+
default=True,
|
65
|
+
description="Whether this data source is active for search"
|
66
|
+
)
|
67
|
+
|
68
|
+
is_public: bool = Field(
|
69
|
+
default=False,
|
70
|
+
description="Whether this data is publicly searchable"
|
71
|
+
)
|
72
|
+
|
73
|
+
# Additional data
|
74
|
+
metadata: Dict[str, Any] = Field(
|
75
|
+
default_factory=dict,
|
76
|
+
description="Additional metadata from the source"
|
77
|
+
)
|
78
|
+
|
79
|
+
source_config: Dict[str, Any] = Field(
|
80
|
+
default_factory=dict,
|
81
|
+
description="Configuration for data extraction"
|
82
|
+
)
|
83
|
+
|
84
|
+
tags: List[str] = Field(
|
85
|
+
default_factory=list,
|
86
|
+
description="Tags for categorization and filtering"
|
87
|
+
)
|
88
|
+
|
89
|
+
@field_validator('tags')
|
90
|
+
@classmethod
|
91
|
+
def validate_tags(cls, v):
|
92
|
+
"""Validate tags are non-empty strings."""
|
93
|
+
if not isinstance(v, list):
|
94
|
+
raise ValueError('Tags must be a list of strings')
|
95
|
+
|
96
|
+
for tag in v:
|
97
|
+
if not isinstance(tag, str) or not tag.strip():
|
98
|
+
raise ValueError('Each tag must be a non-empty string')
|
99
|
+
|
100
|
+
return [tag.strip() for tag in v]
|
101
|
+
|
102
|
+
@field_validator('metadata', 'source_config')
|
103
|
+
@classmethod
|
104
|
+
def validate_json_fields(cls, v):
|
105
|
+
"""Validate JSON fields are serializable."""
|
106
|
+
if not isinstance(v, dict):
|
107
|
+
raise ValueError('Must be a dictionary')
|
108
|
+
return v
|
@@ -0,0 +1,81 @@
|
|
1
|
+
"""
|
2
|
+
ExternalData creator for the mixin.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import logging
|
6
|
+
from typing import Optional, Dict, Any
|
7
|
+
from django.contrib.auth import get_user_model
|
8
|
+
from django.db import transaction
|
9
|
+
from django.utils import timezone
|
10
|
+
|
11
|
+
from .config import ExternalDataConfig
|
12
|
+
from ..models.external_data import ExternalData, ExternalDataStatus
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
class ExternalDataCreator:
|
18
|
+
"""
|
19
|
+
Creator class for ExternalData objects with validation.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(self, user=None):
|
23
|
+
if user is None:
|
24
|
+
self.user = self._get_default_user()
|
25
|
+
else:
|
26
|
+
self.user = user
|
27
|
+
|
28
|
+
def create_from_config(self, config: ExternalDataConfig) -> Dict[str, Any]:
|
29
|
+
"""
|
30
|
+
Create an ExternalData object from a Pydantic configuration.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
config: An instance of ExternalDataConfig.
|
34
|
+
|
35
|
+
Returns:
|
36
|
+
dict: Result with success status, external_data object, and message/error.
|
37
|
+
"""
|
38
|
+
try:
|
39
|
+
with transaction.atomic():
|
40
|
+
external_data = ExternalData.objects.create(
|
41
|
+
user=self.user,
|
42
|
+
title=config.title,
|
43
|
+
description=config.description,
|
44
|
+
source_type=config.source_type,
|
45
|
+
source_identifier=config.source_identifier,
|
46
|
+
content=config.content,
|
47
|
+
similarity_threshold=config.similarity_threshold,
|
48
|
+
is_active=config.is_active,
|
49
|
+
is_public=config.is_public,
|
50
|
+
metadata=config.metadata,
|
51
|
+
source_config=config.source_config,
|
52
|
+
tags=config.tags,
|
53
|
+
status=ExternalDataStatus.PENDING, # Always set to pending on creation
|
54
|
+
processed_at=None,
|
55
|
+
processing_error="",
|
56
|
+
)
|
57
|
+
|
58
|
+
logger.info(f"Created ExternalData: {external_data.title} (ID: {external_data.id})")
|
59
|
+
return {
|
60
|
+
'success': True,
|
61
|
+
'message': f"ExternalData '{external_data.title}' created successfully.",
|
62
|
+
'external_data': external_data
|
63
|
+
}
|
64
|
+
except Exception as e:
|
65
|
+
logger.error(f"Failed to create ExternalData from config: {e}")
|
66
|
+
return {
|
67
|
+
'success': False,
|
68
|
+
'error': f"Failed to create ExternalData: {e}",
|
69
|
+
'external_data': None
|
70
|
+
}
|
71
|
+
|
72
|
+
def _get_default_user(self):
|
73
|
+
"""Get default user for ExternalData ownership."""
|
74
|
+
User = get_user_model()
|
75
|
+
|
76
|
+
# Try to find a superuser
|
77
|
+
superuser = User.objects.filter(is_superuser=True).first()
|
78
|
+
if superuser:
|
79
|
+
return superuser
|
80
|
+
|
81
|
+
raise ValueError("No user provided and no superuser found for ExternalData ownership")
|