django-cfg 1.1.82__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_cfg/__init__.py +20 -448
- django_cfg/apps/accounts/README.md +3 -3
- django_cfg/apps/accounts/admin/__init__.py +0 -2
- django_cfg/apps/accounts/admin/activity.py +2 -9
- django_cfg/apps/accounts/admin/filters.py +0 -42
- django_cfg/apps/accounts/admin/inlines.py +8 -8
- django_cfg/apps/accounts/admin/otp.py +5 -5
- django_cfg/apps/accounts/admin/registration_source.py +1 -8
- django_cfg/apps/accounts/admin/user.py +12 -20
- django_cfg/apps/accounts/managers/user_manager.py +2 -129
- django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
- django_cfg/apps/accounts/models.py +3 -123
- django_cfg/apps/accounts/serializers/otp.py +40 -44
- django_cfg/apps/accounts/serializers/profile.py +0 -2
- django_cfg/apps/accounts/services/otp_service.py +98 -186
- django_cfg/apps/accounts/signals.py +25 -15
- django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
- django_cfg/apps/accounts/views/otp.py +35 -36
- django_cfg/apps/agents/README.md +129 -0
- django_cfg/apps/agents/__init__.py +68 -0
- django_cfg/apps/agents/admin/__init__.py +17 -0
- django_cfg/apps/agents/admin/execution_admin.py +460 -0
- django_cfg/apps/agents/admin/registry_admin.py +360 -0
- django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
- django_cfg/apps/agents/apps.py +29 -0
- django_cfg/apps/agents/core/__init__.py +20 -0
- django_cfg/apps/agents/core/agent.py +281 -0
- django_cfg/apps/agents/core/dependencies.py +154 -0
- django_cfg/apps/agents/core/exceptions.py +66 -0
- django_cfg/apps/agents/core/models.py +106 -0
- django_cfg/apps/agents/core/orchestrator.py +391 -0
- django_cfg/apps/agents/examples/__init__.py +3 -0
- django_cfg/apps/agents/examples/simple_example.py +161 -0
- django_cfg/apps/agents/integration/__init__.py +14 -0
- django_cfg/apps/agents/integration/middleware.py +80 -0
- django_cfg/apps/agents/integration/registry.py +345 -0
- django_cfg/apps/agents/integration/signals.py +50 -0
- django_cfg/apps/agents/management/__init__.py +3 -0
- django_cfg/apps/agents/management/commands/__init__.py +3 -0
- django_cfg/apps/agents/management/commands/create_agent.py +365 -0
- django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
- django_cfg/apps/agents/managers/__init__.py +23 -0
- django_cfg/apps/agents/managers/execution.py +236 -0
- django_cfg/apps/agents/managers/registry.py +254 -0
- django_cfg/apps/agents/managers/toolsets.py +496 -0
- django_cfg/apps/agents/migrations/0001_initial.py +286 -0
- django_cfg/apps/agents/migrations/__init__.py +5 -0
- django_cfg/apps/agents/models/__init__.py +15 -0
- django_cfg/apps/agents/models/execution.py +215 -0
- django_cfg/apps/agents/models/registry.py +220 -0
- django_cfg/apps/agents/models/toolsets.py +305 -0
- django_cfg/apps/agents/patterns/__init__.py +24 -0
- django_cfg/apps/agents/patterns/content_agents.py +234 -0
- django_cfg/apps/agents/toolsets/__init__.py +15 -0
- django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
- django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
- django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
- django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
- django_cfg/apps/agents/urls.py +46 -0
- django_cfg/apps/knowbase/README.md +150 -0
- django_cfg/apps/knowbase/__init__.py +27 -0
- django_cfg/apps/knowbase/admin/__init__.py +23 -0
- django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
- django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
- django_cfg/apps/knowbase/admin/document_admin.py +650 -0
- django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
- django_cfg/apps/knowbase/apps.py +81 -0
- django_cfg/apps/knowbase/config/README.md +176 -0
- django_cfg/apps/knowbase/config/__init__.py +51 -0
- django_cfg/apps/knowbase/config/constance_fields.py +186 -0
- django_cfg/apps/knowbase/config/constance_settings.py +200 -0
- django_cfg/apps/knowbase/config/settings.py +450 -0
- django_cfg/apps/knowbase/examples/__init__.py +3 -0
- django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
- django_cfg/apps/knowbase/management/__init__.py +0 -0
- django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
- django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
- django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
- django_cfg/apps/knowbase/managers/__init__.py +22 -0
- django_cfg/apps/knowbase/managers/archive.py +426 -0
- django_cfg/apps/knowbase/managers/base.py +32 -0
- django_cfg/apps/knowbase/managers/chat.py +141 -0
- django_cfg/apps/knowbase/managers/document.py +203 -0
- django_cfg/apps/knowbase/managers/external_data.py +471 -0
- django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
- django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
- django_cfg/apps/knowbase/migrations/__init__.py +5 -0
- django_cfg/apps/knowbase/mixins/__init__.py +15 -0
- django_cfg/apps/knowbase/mixins/config.py +108 -0
- django_cfg/apps/knowbase/mixins/creator.py +81 -0
- django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
- django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
- django_cfg/apps/knowbase/mixins/service.py +362 -0
- django_cfg/apps/knowbase/models/__init__.py +41 -0
- django_cfg/apps/knowbase/models/archive.py +599 -0
- django_cfg/apps/knowbase/models/base.py +58 -0
- django_cfg/apps/knowbase/models/chat.py +157 -0
- django_cfg/apps/knowbase/models/document.py +267 -0
- django_cfg/apps/knowbase/models/external_data.py +376 -0
- django_cfg/apps/knowbase/serializers/__init__.py +68 -0
- django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
- django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
- django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
- django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
- django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
- django_cfg/apps/knowbase/services/__init__.py +40 -0
- django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
- django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
- django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
- django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
- django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
- django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
- django_cfg/apps/knowbase/services/base.py +53 -0
- django_cfg/apps/knowbase/services/chat_service.py +239 -0
- django_cfg/apps/knowbase/services/document_service.py +144 -0
- django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
- django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
- django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
- django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
- django_cfg/apps/knowbase/services/embedding/models.py +229 -0
- django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
- django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
- django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
- django_cfg/apps/knowbase/services/search_service.py +293 -0
- django_cfg/apps/knowbase/signals/__init__.py +21 -0
- django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
- django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
- django_cfg/apps/knowbase/signals/document_signals.py +143 -0
- django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
- django_cfg/apps/knowbase/tasks/__init__.py +39 -0
- django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
- django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
- django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
- django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
- django_cfg/apps/knowbase/urls.py +43 -0
- django_cfg/apps/knowbase/utils/__init__.py +12 -0
- django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
- django_cfg/apps/knowbase/utils/text_processing.py +375 -0
- django_cfg/apps/knowbase/utils/validation.py +99 -0
- django_cfg/apps/knowbase/views/__init__.py +28 -0
- django_cfg/apps/knowbase/views/archive_views.py +469 -0
- django_cfg/apps/knowbase/views/base.py +49 -0
- django_cfg/apps/knowbase/views/chat_views.py +181 -0
- django_cfg/apps/knowbase/views/document_views.py +183 -0
- django_cfg/apps/knowbase/views/public_views.py +129 -0
- django_cfg/apps/leads/admin.py +70 -0
- django_cfg/apps/newsletter/admin.py +234 -0
- django_cfg/apps/newsletter/admin_filters.py +124 -0
- django_cfg/apps/support/admin.py +196 -0
- django_cfg/apps/support/admin_filters.py +71 -0
- django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
- django_cfg/apps/urls.py +5 -4
- django_cfg/cli/README.md +1 -1
- django_cfg/cli/commands/create_project.py +2 -2
- django_cfg/cli/commands/info.py +1 -1
- django_cfg/config.py +44 -0
- django_cfg/core/config.py +29 -82
- django_cfg/core/environment.py +1 -1
- django_cfg/core/generation.py +19 -107
- django_cfg/{integration.py → core/integration.py} +18 -16
- django_cfg/core/validation.py +1 -1
- django_cfg/management/__init__.py +1 -1
- django_cfg/management/commands/__init__.py +1 -1
- django_cfg/management/commands/auto_generate.py +482 -0
- django_cfg/management/commands/migrator.py +19 -101
- django_cfg/management/commands/test_email.py +1 -1
- django_cfg/middleware/README.md +0 -158
- django_cfg/middleware/__init__.py +0 -2
- django_cfg/middleware/user_activity.py +3 -3
- django_cfg/models/api.py +145 -0
- django_cfg/models/base.py +287 -0
- django_cfg/models/cache.py +4 -4
- django_cfg/models/constance.py +25 -88
- django_cfg/models/database.py +9 -9
- django_cfg/models/drf.py +3 -36
- django_cfg/models/email.py +163 -0
- django_cfg/models/environment.py +276 -0
- django_cfg/models/limits.py +1 -1
- django_cfg/models/logging.py +366 -0
- django_cfg/models/revolution.py +41 -2
- django_cfg/models/security.py +125 -0
- django_cfg/models/services.py +1 -1
- django_cfg/modules/__init__.py +2 -56
- django_cfg/modules/base.py +78 -52
- django_cfg/modules/django_currency/service.py +2 -2
- django_cfg/modules/django_email.py +2 -2
- django_cfg/modules/django_health.py +267 -0
- django_cfg/modules/django_llm/llm/client.py +91 -19
- django_cfg/modules/django_llm/translator/translator.py +2 -2
- django_cfg/modules/django_logger.py +2 -2
- django_cfg/modules/django_ngrok.py +2 -2
- django_cfg/modules/django_tasks.py +68 -3
- django_cfg/modules/django_telegram.py +3 -3
- django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
- django_cfg/modules/django_twilio/service.py +2 -2
- django_cfg/modules/django_twilio/simple_service.py +2 -2
- django_cfg/modules/django_twilio/twilio_service.py +2 -2
- django_cfg/modules/django_unfold/__init__.py +69 -0
- django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
- django_cfg/modules/django_unfold/dashboard.py +278 -0
- django_cfg/modules/django_unfold/icons/README.md +145 -0
- django_cfg/modules/django_unfold/icons/__init__.py +12 -0
- django_cfg/modules/django_unfold/icons/constants.py +2851 -0
- django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
- django_cfg/modules/django_unfold/models/__init__.py +42 -0
- django_cfg/modules/django_unfold/models/config.py +601 -0
- django_cfg/modules/django_unfold/models/dashboard.py +206 -0
- django_cfg/modules/django_unfold/models/dropdown.py +40 -0
- django_cfg/modules/django_unfold/models/navigation.py +73 -0
- django_cfg/modules/django_unfold/models/tabs.py +25 -0
- django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
- django_cfg/modules/django_unfold/utils.py +140 -0
- django_cfg/registry/__init__.py +23 -0
- django_cfg/registry/core.py +61 -0
- django_cfg/registry/exceptions.py +11 -0
- django_cfg/registry/modules.py +12 -0
- django_cfg/registry/services.py +26 -0
- django_cfg/registry/third_party.py +52 -0
- django_cfg/routing/__init__.py +19 -0
- django_cfg/routing/callbacks.py +198 -0
- django_cfg/routing/routers.py +48 -0
- django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
- django_cfg/templatetags/__init__.py +0 -0
- django_cfg/templatetags/django_cfg.py +33 -0
- django_cfg/urls.py +33 -0
- django_cfg/utils/path_resolution.py +1 -1
- django_cfg/utils/smart_defaults.py +7 -61
- django_cfg/utils/toolkit.py +663 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/METADATA +83 -86
- django_cfg-1.2.1.dist-info/RECORD +441 -0
- django_cfg/archive/django_sample.zip +0 -0
- django_cfg/models/unfold.py +0 -271
- django_cfg/modules/unfold/__init__.py +0 -29
- django_cfg/modules/unfold/dashboard.py +0 -318
- django_cfg/pyproject.toml +0 -370
- django_cfg/routers.py +0 -83
- django_cfg-1.1.82.dist-info/RECORD +0 -278
- /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
- /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
- /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
- /django_cfg/{version_check.py → utils/version_check.py} +0 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/WHEEL +0 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/entry_points.txt +0 -0
- {django_cfg-1.1.82.dist-info → django_cfg-1.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
"""
|
2
|
+
Batch processing result builder.
|
3
|
+
|
4
|
+
This module provides a clean way to build BatchProcessingResult
|
5
|
+
from individual batch operations without using raw dicts.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import List
|
9
|
+
from .models import BatchProcessingResult, EmbeddingResult
|
10
|
+
|
11
|
+
|
12
|
+
class BatchResultBuilder:
|
13
|
+
"""Builder for BatchProcessingResult to avoid raw dict usage."""
|
14
|
+
|
15
|
+
def __init__(self, total_chunks: int):
|
16
|
+
self.total_chunks = total_chunks
|
17
|
+
self.successful_chunks = 0
|
18
|
+
self.failed_chunks = 0
|
19
|
+
self.total_tokens = 0
|
20
|
+
self.total_cost = 0.0
|
21
|
+
self.errors: List[str] = []
|
22
|
+
self.start_time: float = 0.0
|
23
|
+
|
24
|
+
def add_successful_result(self, result: EmbeddingResult) -> None:
|
25
|
+
"""Add a successful embedding result."""
|
26
|
+
if result.success:
|
27
|
+
self.successful_chunks += 1
|
28
|
+
self.total_tokens += result.tokens
|
29
|
+
self.total_cost += result.cost
|
30
|
+
else:
|
31
|
+
self.add_failed_result(result.error or "Unknown error")
|
32
|
+
|
33
|
+
def add_failed_result(self, error: str) -> None:
|
34
|
+
"""Add a failed result."""
|
35
|
+
self.failed_chunks += 1
|
36
|
+
self.errors.append(error)
|
37
|
+
|
38
|
+
def add_batch_results(self, results: List[EmbeddingResult]) -> None:
|
39
|
+
"""Add multiple results from a batch."""
|
40
|
+
for result in results:
|
41
|
+
if result.success:
|
42
|
+
self.add_successful_result(result)
|
43
|
+
else:
|
44
|
+
self.add_failed_result(result.error or "Unknown error")
|
45
|
+
|
46
|
+
def add_batch_error(self, error: str, chunk_count: int) -> None:
|
47
|
+
"""Add an error that affected an entire batch."""
|
48
|
+
self.failed_chunks += chunk_count
|
49
|
+
self.errors.append(error)
|
50
|
+
|
51
|
+
def build(self, processing_time: float) -> BatchProcessingResult:
|
52
|
+
"""Build the final BatchProcessingResult."""
|
53
|
+
return BatchProcessingResult(
|
54
|
+
total_chunks=self.total_chunks,
|
55
|
+
successful_chunks=self.successful_chunks,
|
56
|
+
failed_chunks=self.failed_chunks,
|
57
|
+
total_tokens=self.total_tokens,
|
58
|
+
total_cost=self.total_cost,
|
59
|
+
processing_time=processing_time,
|
60
|
+
errors=self.errors
|
61
|
+
)
|
@@ -0,0 +1,229 @@
|
|
1
|
+
"""
|
2
|
+
Data models for embedding processing.
|
3
|
+
|
4
|
+
This module defines the core data structures used throughout
|
5
|
+
the embedding processing pipeline using Pydantic for type safety.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import List, Dict, Any, Optional
|
9
|
+
from pydantic import BaseModel, Field, validator
|
10
|
+
from enum import Enum
|
11
|
+
|
12
|
+
|
13
|
+
class ChunkType(str, Enum):
|
14
|
+
"""Supported chunk types."""
|
15
|
+
DOCUMENT = "document"
|
16
|
+
ARCHIVE = "archive"
|
17
|
+
EXTERNAL_DATA = "external_data"
|
18
|
+
UNKNOWN = "unknown"
|
19
|
+
|
20
|
+
|
21
|
+
class ChunkData(BaseModel):
|
22
|
+
"""Unified chunk data structure for processing."""
|
23
|
+
id: str = Field(..., description="Unique chunk identifier")
|
24
|
+
content: str = Field(..., min_length=1, description="Chunk content text")
|
25
|
+
context_metadata: Optional[Dict[str, Any]] = Field(
|
26
|
+
default=None,
|
27
|
+
description="Additional context metadata for the chunk"
|
28
|
+
)
|
29
|
+
parent_id: Optional[str] = Field(
|
30
|
+
default=None,
|
31
|
+
description="ID of the parent document or archive"
|
32
|
+
)
|
33
|
+
parent_type: ChunkType = Field(
|
34
|
+
default=ChunkType.UNKNOWN,
|
35
|
+
description="Type of parent content"
|
36
|
+
)
|
37
|
+
|
38
|
+
@validator('content')
|
39
|
+
def content_must_not_be_empty(cls, v):
|
40
|
+
if not v or not v.strip():
|
41
|
+
raise ValueError('Content cannot be empty')
|
42
|
+
return v.strip()
|
43
|
+
|
44
|
+
class Config:
|
45
|
+
use_enum_values = True
|
46
|
+
|
47
|
+
|
48
|
+
class EmbeddingResult(BaseModel):
|
49
|
+
"""Result of embedding generation."""
|
50
|
+
chunk_id: str = Field(..., description="ID of the processed chunk")
|
51
|
+
embedding: List[float] = Field(
|
52
|
+
default_factory=list,
|
53
|
+
description="Generated embedding vector"
|
54
|
+
)
|
55
|
+
tokens: int = Field(
|
56
|
+
default=0,
|
57
|
+
ge=0,
|
58
|
+
description="Number of tokens used"
|
59
|
+
)
|
60
|
+
cost: float = Field(
|
61
|
+
default=0.0,
|
62
|
+
ge=0.0,
|
63
|
+
description="Processing cost in USD"
|
64
|
+
)
|
65
|
+
success: bool = Field(
|
66
|
+
default=True,
|
67
|
+
description="Whether embedding generation was successful"
|
68
|
+
)
|
69
|
+
error: Optional[str] = Field(
|
70
|
+
default=None,
|
71
|
+
description="Error message if processing failed"
|
72
|
+
)
|
73
|
+
processing_time: Optional[float] = Field(
|
74
|
+
default=None,
|
75
|
+
ge=0.0,
|
76
|
+
description="Time taken to process this chunk in seconds"
|
77
|
+
)
|
78
|
+
|
79
|
+
@validator('embedding')
|
80
|
+
def validate_embedding_dimension(cls, v):
|
81
|
+
if v is not None and len(v) > 0 and len(v) not in [1536, 3072]: # Common OpenAI embedding dimensions
|
82
|
+
# Warning, not error - allow different dimensions
|
83
|
+
pass
|
84
|
+
return v
|
85
|
+
|
86
|
+
class Config:
|
87
|
+
validate_assignment = True
|
88
|
+
|
89
|
+
|
90
|
+
class BatchProcessingResult(BaseModel):
|
91
|
+
"""Result of batch processing."""
|
92
|
+
total_chunks: int = Field(
|
93
|
+
...,
|
94
|
+
ge=0,
|
95
|
+
description="Total number of chunks processed"
|
96
|
+
)
|
97
|
+
successful_chunks: int = Field(
|
98
|
+
...,
|
99
|
+
ge=0,
|
100
|
+
description="Number of successfully processed chunks"
|
101
|
+
)
|
102
|
+
failed_chunks: int = Field(
|
103
|
+
...,
|
104
|
+
ge=0,
|
105
|
+
description="Number of failed chunks"
|
106
|
+
)
|
107
|
+
total_tokens: int = Field(
|
108
|
+
default=0,
|
109
|
+
ge=0,
|
110
|
+
description="Total tokens used across all chunks"
|
111
|
+
)
|
112
|
+
total_cost: float = Field(
|
113
|
+
default=0.0,
|
114
|
+
ge=0.0,
|
115
|
+
description="Total processing cost in USD"
|
116
|
+
)
|
117
|
+
processing_time: float = Field(
|
118
|
+
...,
|
119
|
+
ge=0.0,
|
120
|
+
description="Total processing time in seconds"
|
121
|
+
)
|
122
|
+
errors: List[str] = Field(
|
123
|
+
default_factory=list,
|
124
|
+
description="List of error messages"
|
125
|
+
)
|
126
|
+
|
127
|
+
# Computed properties
|
128
|
+
@property
|
129
|
+
def success_rate(self) -> float:
|
130
|
+
"""Calculate success rate as percentage."""
|
131
|
+
if self.total_chunks == 0:
|
132
|
+
return 0.0
|
133
|
+
return (self.successful_chunks / self.total_chunks) * 100.0
|
134
|
+
|
135
|
+
@property
|
136
|
+
def chunks_per_second(self) -> float:
|
137
|
+
"""Calculate processing speed."""
|
138
|
+
if self.processing_time == 0:
|
139
|
+
return 0.0
|
140
|
+
return self.total_chunks / self.processing_time
|
141
|
+
|
142
|
+
@property
|
143
|
+
def average_cost_per_chunk(self) -> float:
|
144
|
+
"""Calculate average cost per successfully processed chunk."""
|
145
|
+
if self.successful_chunks == 0:
|
146
|
+
return 0.0
|
147
|
+
return self.total_cost / self.successful_chunks
|
148
|
+
|
149
|
+
@property
|
150
|
+
def average_tokens_per_chunk(self) -> float:
|
151
|
+
"""Calculate average tokens per successfully processed chunk."""
|
152
|
+
if self.successful_chunks == 0:
|
153
|
+
return 0.0
|
154
|
+
return self.total_tokens / self.successful_chunks
|
155
|
+
|
156
|
+
@validator('successful_chunks', 'failed_chunks')
|
157
|
+
def validate_chunk_counts(cls, v, values):
|
158
|
+
if 'total_chunks' in values:
|
159
|
+
total = values['total_chunks']
|
160
|
+
if v > total:
|
161
|
+
raise ValueError(f'Chunk count cannot exceed total chunks ({total})')
|
162
|
+
return v
|
163
|
+
|
164
|
+
@validator('failed_chunks')
|
165
|
+
def validate_total_consistency(cls, v, values):
|
166
|
+
if 'total_chunks' in values and 'successful_chunks' in values:
|
167
|
+
expected_failed = values['total_chunks'] - values['successful_chunks']
|
168
|
+
if v != expected_failed:
|
169
|
+
raise ValueError(
|
170
|
+
f'Failed chunks ({v}) + successful chunks ({values["successful_chunks"]}) '
|
171
|
+
f'must equal total chunks ({values["total_chunks"]})'
|
172
|
+
)
|
173
|
+
return v
|
174
|
+
|
175
|
+
class Config:
|
176
|
+
validate_assignment = True
|
177
|
+
|
178
|
+
def model_dump_summary(self) -> Dict[str, Any]:
|
179
|
+
"""Get a summary dict for logging."""
|
180
|
+
return {
|
181
|
+
"total_chunks": self.total_chunks,
|
182
|
+
"successful": self.successful_chunks,
|
183
|
+
"failed": self.failed_chunks,
|
184
|
+
"success_rate": f"{self.success_rate:.1f}%",
|
185
|
+
"total_tokens": self.total_tokens,
|
186
|
+
"total_cost": f"${self.total_cost:.4f}",
|
187
|
+
"processing_time": f"{self.processing_time:.2f}s",
|
188
|
+
"chunks_per_second": f"{self.chunks_per_second:.1f}",
|
189
|
+
"avg_cost_per_chunk": f"${self.average_cost_per_chunk:.4f}",
|
190
|
+
"error_count": len(self.errors)
|
191
|
+
}
|
192
|
+
|
193
|
+
|
194
|
+
class ProcessingConfig(BaseModel):
|
195
|
+
"""Configuration for embedding processing."""
|
196
|
+
batch_size: int = Field(
|
197
|
+
default=100,
|
198
|
+
ge=1,
|
199
|
+
le=2048,
|
200
|
+
description="Number of chunks to process in one batch"
|
201
|
+
)
|
202
|
+
embedding_model: str = Field(
|
203
|
+
default="text-embedding-ada-002",
|
204
|
+
description="OpenAI embedding model to use"
|
205
|
+
)
|
206
|
+
max_retries: int = Field(
|
207
|
+
default=3,
|
208
|
+
ge=0,
|
209
|
+
le=10,
|
210
|
+
description="Maximum number of retries for failed requests"
|
211
|
+
)
|
212
|
+
retry_delay: float = Field(
|
213
|
+
default=1.0,
|
214
|
+
ge=0.0,
|
215
|
+
description="Delay between retries in seconds"
|
216
|
+
)
|
217
|
+
rate_limit_delay: float = Field(
|
218
|
+
default=0.5,
|
219
|
+
ge=0.0,
|
220
|
+
description="Delay between batches to respect rate limits"
|
221
|
+
)
|
222
|
+
timeout_seconds: int = Field(
|
223
|
+
default=60,
|
224
|
+
ge=1,
|
225
|
+
description="Timeout for API requests in seconds"
|
226
|
+
)
|
227
|
+
|
228
|
+
class Config:
|
229
|
+
validate_assignment = True
|
@@ -0,0 +1,148 @@
|
|
1
|
+
"""
|
2
|
+
Chunk processors for different content types.
|
3
|
+
|
4
|
+
This module provides specialized processors for handling
|
5
|
+
document and archive chunks with their specific requirements.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
from typing import Protocol
|
10
|
+
|
11
|
+
from django_cfg.apps.knowbase.models import DocumentChunk, ArchiveItemChunk, ExternalDataChunk
|
12
|
+
from .models import ChunkData, EmbeddingResult
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
class ChunkProcessor(Protocol):
|
18
|
+
"""Protocol for chunk processors."""
|
19
|
+
|
20
|
+
def prepare_content_for_embedding(self, chunk: ChunkData) -> str:
|
21
|
+
"""Prepare chunk content for embedding generation."""
|
22
|
+
...
|
23
|
+
|
24
|
+
def save_embedding_result(self, chunk_id: str, result: EmbeddingResult) -> None:
|
25
|
+
"""Save embedding result to database."""
|
26
|
+
...
|
27
|
+
|
28
|
+
|
29
|
+
class DocumentChunkProcessor:
|
30
|
+
"""Processor for document chunks."""
|
31
|
+
|
32
|
+
def prepare_content_for_embedding(self, chunk: ChunkData) -> str:
|
33
|
+
"""Prepare document chunk content for embedding."""
|
34
|
+
return chunk.content.strip()
|
35
|
+
|
36
|
+
def save_embedding_result(self, chunk_id: str, result: EmbeddingResult) -> None:
|
37
|
+
"""Save embedding result for document chunk."""
|
38
|
+
try:
|
39
|
+
logger.debug(f"🔍 Looking for document chunk with id: {chunk_id}")
|
40
|
+
chunk = DocumentChunk.objects.get(id=chunk_id)
|
41
|
+
logger.debug(f"📄 Found document chunk: {chunk.id}, current embedding length: {len(chunk.embedding) if chunk.embedding is not None and len(chunk.embedding) > 0 else 0}")
|
42
|
+
|
43
|
+
chunk.embedding = result.embedding
|
44
|
+
chunk.token_count = result.tokens
|
45
|
+
chunk.embedding_cost = result.cost
|
46
|
+
chunk.save(update_fields=['embedding', 'token_count', 'embedding_cost'])
|
47
|
+
|
48
|
+
logger.info(f"✅ Document chunk {chunk_id} embedding saved: {result.tokens} tokens, ${result.cost:.4f}, embedding_len={len(result.embedding)}")
|
49
|
+
|
50
|
+
except DocumentChunk.DoesNotExist:
|
51
|
+
logger.error(f"❌ Document chunk {chunk_id} not found")
|
52
|
+
raise
|
53
|
+
except Exception as e:
|
54
|
+
logger.error(f"❌ Error saving document chunk {chunk_id}: {e}")
|
55
|
+
raise
|
56
|
+
|
57
|
+
|
58
|
+
class ArchiveChunkProcessor:
|
59
|
+
"""Processor for archive chunks."""
|
60
|
+
|
61
|
+
def prepare_content_for_embedding(self, chunk: ChunkData) -> str:
|
62
|
+
"""Prepare archive chunk content for embedding with context."""
|
63
|
+
content = chunk.content
|
64
|
+
context = chunk.context_metadata or {}
|
65
|
+
|
66
|
+
# Build context prefix for better embeddings
|
67
|
+
context_parts = []
|
68
|
+
|
69
|
+
if context.get('file_path'):
|
70
|
+
context_parts.append(f"File: {context['file_path']}")
|
71
|
+
if context.get('function_name'):
|
72
|
+
context_parts.append(f"Function: {context['function_name']}")
|
73
|
+
if context.get('class_name'):
|
74
|
+
context_parts.append(f"Class: {context['class_name']}")
|
75
|
+
if context.get('language'):
|
76
|
+
context_parts.append(f"Language: {context['language']}")
|
77
|
+
|
78
|
+
if context_parts:
|
79
|
+
context_prefix = " | ".join(context_parts)
|
80
|
+
enhanced_content = f"{context_prefix}\n\n{content}"
|
81
|
+
else:
|
82
|
+
enhanced_content = content
|
83
|
+
|
84
|
+
# Ensure content is not too long for embedding model
|
85
|
+
max_length = 8000 # Conservative limit
|
86
|
+
if len(enhanced_content) > max_length:
|
87
|
+
if context_parts:
|
88
|
+
context_prefix_len = len(context_prefix) + 2 # +2 for \n\n
|
89
|
+
available_content_len = max_length - context_prefix_len
|
90
|
+
if available_content_len > 100: # Ensure we have meaningful content
|
91
|
+
truncated_content = content[:available_content_len] + "..."
|
92
|
+
enhanced_content = f"{context_prefix}\n\n{truncated_content}"
|
93
|
+
else:
|
94
|
+
enhanced_content = content[:max_length] + "..."
|
95
|
+
else:
|
96
|
+
enhanced_content = content[:max_length] + "..."
|
97
|
+
|
98
|
+
return enhanced_content.strip()
|
99
|
+
|
100
|
+
def save_embedding_result(self, chunk_id: str, result: EmbeddingResult) -> None:
|
101
|
+
"""Save embedding result for archive chunk."""
|
102
|
+
try:
|
103
|
+
chunk = ArchiveItemChunk.objects.select_related('item').get(id=chunk_id)
|
104
|
+
chunk.embedding = result.embedding
|
105
|
+
chunk.token_count = result.tokens
|
106
|
+
chunk.embedding_cost = result.cost
|
107
|
+
chunk.save(update_fields=['embedding', 'token_count', 'embedding_cost'])
|
108
|
+
|
109
|
+
# Update parent item statistics
|
110
|
+
item = chunk.item
|
111
|
+
item.total_tokens += result.tokens
|
112
|
+
item.processing_cost += result.cost
|
113
|
+
item.save(update_fields=['total_tokens', 'processing_cost'])
|
114
|
+
|
115
|
+
logger.debug(f"✅ Archive chunk {chunk_id} embedding saved: {result.tokens} tokens, ${result.cost:.4f}")
|
116
|
+
|
117
|
+
except ArchiveItemChunk.DoesNotExist:
|
118
|
+
logger.error(f"❌ Archive chunk {chunk_id} not found")
|
119
|
+
raise
|
120
|
+
|
121
|
+
|
122
|
+
class ExternalDataChunkProcessor:
|
123
|
+
"""Processor for external data chunks."""
|
124
|
+
|
125
|
+
def prepare_content_for_embedding(self, chunk: ChunkData) -> str:
|
126
|
+
"""Prepare external data chunk content for embedding."""
|
127
|
+
return chunk.content.strip()
|
128
|
+
|
129
|
+
def save_embedding_result(self, chunk_id: str, result: EmbeddingResult) -> None:
|
130
|
+
"""Save embedding result for external data chunk."""
|
131
|
+
try:
|
132
|
+
logger.debug(f"🔍 Looking for external data chunk with id: {chunk_id}")
|
133
|
+
chunk = ExternalDataChunk.objects.get(id=chunk_id)
|
134
|
+
logger.debug(f"🔗 Found external data chunk: {chunk.id}, current embedding length: {len(chunk.embedding) if chunk.embedding is not None and len(chunk.embedding) > 0 else 0}")
|
135
|
+
|
136
|
+
chunk.embedding = result.embedding
|
137
|
+
chunk.token_count = result.tokens
|
138
|
+
chunk.embedding_cost = result.cost
|
139
|
+
chunk.save(update_fields=['embedding', 'token_count', 'embedding_cost'])
|
140
|
+
|
141
|
+
logger.info(f"✅ External data chunk {chunk_id} embedding saved: {result.tokens} tokens, ${result.cost:.4f}, embedding_len={len(result.embedding)}")
|
142
|
+
|
143
|
+
except ExternalDataChunk.DoesNotExist:
|
144
|
+
logger.error(f"❌ External data chunk {chunk_id} not found")
|
145
|
+
raise
|
146
|
+
except Exception as e:
|
147
|
+
logger.error(f"❌ Error saving external data chunk {chunk_id}: {e}")
|
148
|
+
raise
|
@@ -0,0 +1,176 @@
|
|
1
|
+
"""
|
2
|
+
Utility functions for embedding processing.
|
3
|
+
|
4
|
+
This module provides convenient wrapper functions for common
|
5
|
+
embedding processing operations.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
from typing import List, Union
|
10
|
+
|
11
|
+
from django_cfg.apps.knowbase.models import DocumentChunk, ArchiveItemChunk, ExternalDataChunk
|
12
|
+
|
13
|
+
from .models import ChunkData, BatchProcessingResult, ChunkType
|
14
|
+
from .batch_processor import OptimizedEmbeddingProcessor
|
15
|
+
from .async_processor import AsyncOptimizedEmbeddingProcessor
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
def process_document_chunks_optimized(document_chunks: List[DocumentChunk]) -> BatchProcessingResult:
|
21
|
+
"""Process document chunks with optimized batch operations."""
|
22
|
+
|
23
|
+
chunk_data = [
|
24
|
+
ChunkData(
|
25
|
+
id=str(chunk.id),
|
26
|
+
content=chunk.content,
|
27
|
+
parent_id=str(chunk.document_id),
|
28
|
+
parent_type=ChunkType.DOCUMENT
|
29
|
+
)
|
30
|
+
for chunk in document_chunks
|
31
|
+
if chunk.content and chunk.content.strip()
|
32
|
+
]
|
33
|
+
|
34
|
+
processor = OptimizedEmbeddingProcessor()
|
35
|
+
return processor.process_chunks_batch(chunk_data)
|
36
|
+
|
37
|
+
|
38
|
+
def process_archive_chunks_optimized(archive_chunks: List[ArchiveItemChunk]) -> BatchProcessingResult:
|
39
|
+
"""Process archive chunks with optimized batch operations."""
|
40
|
+
|
41
|
+
chunk_data = [
|
42
|
+
ChunkData(
|
43
|
+
id=str(chunk.id),
|
44
|
+
content=chunk.content,
|
45
|
+
context_metadata=chunk.context_metadata,
|
46
|
+
parent_id=str(chunk.item_id),
|
47
|
+
parent_type=ChunkType.ARCHIVE
|
48
|
+
)
|
49
|
+
for chunk in archive_chunks
|
50
|
+
if chunk.content and chunk.content.strip()
|
51
|
+
]
|
52
|
+
|
53
|
+
processor = OptimizedEmbeddingProcessor()
|
54
|
+
return processor.process_chunks_batch(chunk_data)
|
55
|
+
|
56
|
+
|
57
|
+
async def aprocess_document_chunks_optimized(document_chunks: List[DocumentChunk]) -> BatchProcessingResult:
|
58
|
+
"""Async version of document chunk processing."""
|
59
|
+
|
60
|
+
chunk_data = [
|
61
|
+
ChunkData(
|
62
|
+
id=str(chunk.id),
|
63
|
+
content=chunk.content,
|
64
|
+
parent_id=str(chunk.document_id),
|
65
|
+
parent_type=ChunkType.DOCUMENT
|
66
|
+
)
|
67
|
+
for chunk in document_chunks
|
68
|
+
if chunk.content and chunk.content.strip()
|
69
|
+
]
|
70
|
+
|
71
|
+
processor = AsyncOptimizedEmbeddingProcessor()
|
72
|
+
return await processor.aprocess_chunks_batch(chunk_data)
|
73
|
+
|
74
|
+
|
75
|
+
async def aprocess_archive_chunks_optimized(archive_chunks: List[ArchiveItemChunk]) -> BatchProcessingResult:
|
76
|
+
"""Async version of archive chunk processing."""
|
77
|
+
|
78
|
+
chunk_data = [
|
79
|
+
ChunkData(
|
80
|
+
id=str(chunk.id),
|
81
|
+
content=chunk.content,
|
82
|
+
context_metadata=chunk.context_metadata,
|
83
|
+
parent_id=str(chunk.item_id),
|
84
|
+
parent_type=ChunkType.ARCHIVE
|
85
|
+
)
|
86
|
+
for chunk in archive_chunks
|
87
|
+
if chunk.content and chunk.content.strip()
|
88
|
+
]
|
89
|
+
|
90
|
+
processor = AsyncOptimizedEmbeddingProcessor()
|
91
|
+
return await processor.aprocess_chunks_batch(chunk_data)
|
92
|
+
|
93
|
+
|
94
|
+
def process_chunks_context_aware(chunks: Union[List[DocumentChunk], List[ArchiveItemChunk]]) -> BatchProcessingResult:
|
95
|
+
"""
|
96
|
+
Context-aware chunk processing that works in both sync and async environments.
|
97
|
+
|
98
|
+
This function automatically detects the execution context and uses appropriate methods.
|
99
|
+
"""
|
100
|
+
if not chunks:
|
101
|
+
return BatchProcessingResult(
|
102
|
+
total_chunks=0,
|
103
|
+
successful_chunks=0,
|
104
|
+
failed_chunks=0,
|
105
|
+
total_tokens=0,
|
106
|
+
total_cost=0.0,
|
107
|
+
processing_time=0.0,
|
108
|
+
errors=[]
|
109
|
+
)
|
110
|
+
|
111
|
+
# Determine chunk type
|
112
|
+
first_chunk = chunks[0]
|
113
|
+
if isinstance(first_chunk, DocumentChunk):
|
114
|
+
chunk_data = [
|
115
|
+
ChunkData(
|
116
|
+
id=str(chunk.id),
|
117
|
+
content=chunk.content,
|
118
|
+
parent_id=str(chunk.document_id),
|
119
|
+
parent_type=ChunkType.DOCUMENT
|
120
|
+
)
|
121
|
+
for chunk in chunks
|
122
|
+
if chunk.content and chunk.content.strip()
|
123
|
+
]
|
124
|
+
elif isinstance(first_chunk, ArchiveItemChunk):
|
125
|
+
chunk_data = [
|
126
|
+
ChunkData(
|
127
|
+
id=str(chunk.id),
|
128
|
+
content=chunk.content,
|
129
|
+
context_metadata=chunk.context_metadata,
|
130
|
+
parent_id=str(chunk.item_id),
|
131
|
+
parent_type=ChunkType.ARCHIVE
|
132
|
+
)
|
133
|
+
for chunk in chunks
|
134
|
+
if chunk.content and chunk.content.strip()
|
135
|
+
]
|
136
|
+
else:
|
137
|
+
raise ValueError(f"Unsupported chunk type: {type(first_chunk)}")
|
138
|
+
|
139
|
+
processor = AsyncOptimizedEmbeddingProcessor()
|
140
|
+
return processor.process_chunks_batch_context_aware(chunk_data)
|
141
|
+
|
142
|
+
|
143
|
+
def process_external_data_chunks_optimized(external_data_chunks: List[ExternalDataChunk]) -> BatchProcessingResult:
|
144
|
+
"""Process external data chunks with optimized batch operations."""
|
145
|
+
|
146
|
+
chunk_data = [
|
147
|
+
ChunkData(
|
148
|
+
id=str(chunk.id),
|
149
|
+
content=chunk.content,
|
150
|
+
context_metadata=chunk.chunk_metadata,
|
151
|
+
parent_id=str(chunk.external_data.id),
|
152
|
+
parent_type=ChunkType.EXTERNAL_DATA
|
153
|
+
)
|
154
|
+
for chunk in external_data_chunks
|
155
|
+
]
|
156
|
+
|
157
|
+
processor = OptimizedEmbeddingProcessor()
|
158
|
+
return processor.process_chunks_batch(chunk_data)
|
159
|
+
|
160
|
+
|
161
|
+
async def aprocess_external_data_chunks_optimized(external_data_chunks: List[ExternalDataChunk]) -> BatchProcessingResult:
|
162
|
+
"""Async version of external data chunk processing."""
|
163
|
+
|
164
|
+
chunk_data = [
|
165
|
+
ChunkData(
|
166
|
+
id=str(chunk.id),
|
167
|
+
content=chunk.content,
|
168
|
+
context_metadata=chunk.chunk_metadata,
|
169
|
+
parent_id=str(chunk.external_data.id),
|
170
|
+
parent_type=ChunkType.EXTERNAL_DATA
|
171
|
+
)
|
172
|
+
for chunk in external_data_chunks
|
173
|
+
]
|
174
|
+
|
175
|
+
processor = AsyncOptimizedEmbeddingProcessor()
|
176
|
+
return await processor.aprocess_chunks_batch(chunk_data)
|