django-cfg 1.1.81__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. django_cfg/__init__.py +20 -448
  2. django_cfg/apps/accounts/README.md +3 -3
  3. django_cfg/apps/accounts/admin/__init__.py +0 -2
  4. django_cfg/apps/accounts/admin/activity.py +2 -9
  5. django_cfg/apps/accounts/admin/filters.py +0 -42
  6. django_cfg/apps/accounts/admin/inlines.py +8 -8
  7. django_cfg/apps/accounts/admin/otp.py +5 -5
  8. django_cfg/apps/accounts/admin/registration_source.py +1 -8
  9. django_cfg/apps/accounts/admin/user.py +12 -20
  10. django_cfg/apps/accounts/managers/user_manager.py +2 -129
  11. django_cfg/apps/accounts/migrations/0006_remove_twilioresponse_otp_secret_and_more.py +46 -0
  12. django_cfg/apps/accounts/models.py +3 -123
  13. django_cfg/apps/accounts/serializers/otp.py +40 -44
  14. django_cfg/apps/accounts/serializers/profile.py +0 -2
  15. django_cfg/apps/accounts/services/otp_service.py +98 -186
  16. django_cfg/apps/accounts/signals.py +25 -15
  17. django_cfg/apps/accounts/utils/auth_email_service.py +84 -0
  18. django_cfg/apps/accounts/views/otp.py +35 -36
  19. django_cfg/apps/agents/README.md +129 -0
  20. django_cfg/apps/agents/__init__.py +68 -0
  21. django_cfg/apps/agents/admin/__init__.py +17 -0
  22. django_cfg/apps/agents/admin/execution_admin.py +460 -0
  23. django_cfg/apps/agents/admin/registry_admin.py +360 -0
  24. django_cfg/apps/agents/admin/toolsets_admin.py +482 -0
  25. django_cfg/apps/agents/apps.py +29 -0
  26. django_cfg/apps/agents/core/__init__.py +20 -0
  27. django_cfg/apps/agents/core/agent.py +281 -0
  28. django_cfg/apps/agents/core/dependencies.py +154 -0
  29. django_cfg/apps/agents/core/exceptions.py +66 -0
  30. django_cfg/apps/agents/core/models.py +106 -0
  31. django_cfg/apps/agents/core/orchestrator.py +391 -0
  32. django_cfg/apps/agents/examples/__init__.py +3 -0
  33. django_cfg/apps/agents/examples/simple_example.py +161 -0
  34. django_cfg/apps/agents/integration/__init__.py +14 -0
  35. django_cfg/apps/agents/integration/middleware.py +80 -0
  36. django_cfg/apps/agents/integration/registry.py +345 -0
  37. django_cfg/apps/agents/integration/signals.py +50 -0
  38. django_cfg/apps/agents/management/__init__.py +3 -0
  39. django_cfg/apps/agents/management/commands/__init__.py +3 -0
  40. django_cfg/apps/agents/management/commands/create_agent.py +365 -0
  41. django_cfg/apps/agents/management/commands/orchestrator_status.py +191 -0
  42. django_cfg/apps/agents/managers/__init__.py +23 -0
  43. django_cfg/apps/agents/managers/execution.py +236 -0
  44. django_cfg/apps/agents/managers/registry.py +254 -0
  45. django_cfg/apps/agents/managers/toolsets.py +496 -0
  46. django_cfg/apps/agents/migrations/0001_initial.py +286 -0
  47. django_cfg/apps/agents/migrations/__init__.py +5 -0
  48. django_cfg/apps/agents/models/__init__.py +15 -0
  49. django_cfg/apps/agents/models/execution.py +215 -0
  50. django_cfg/apps/agents/models/registry.py +220 -0
  51. django_cfg/apps/agents/models/toolsets.py +305 -0
  52. django_cfg/apps/agents/patterns/__init__.py +24 -0
  53. django_cfg/apps/agents/patterns/content_agents.py +234 -0
  54. django_cfg/apps/agents/toolsets/__init__.py +15 -0
  55. django_cfg/apps/agents/toolsets/cache_toolset.py +285 -0
  56. django_cfg/apps/agents/toolsets/django_toolset.py +220 -0
  57. django_cfg/apps/agents/toolsets/file_toolset.py +324 -0
  58. django_cfg/apps/agents/toolsets/orm_toolset.py +319 -0
  59. django_cfg/apps/agents/urls.py +46 -0
  60. django_cfg/apps/knowbase/README.md +150 -0
  61. django_cfg/apps/knowbase/__init__.py +27 -0
  62. django_cfg/apps/knowbase/admin/__init__.py +23 -0
  63. django_cfg/apps/knowbase/admin/archive_admin.py +857 -0
  64. django_cfg/apps/knowbase/admin/chat_admin.py +386 -0
  65. django_cfg/apps/knowbase/admin/document_admin.py +650 -0
  66. django_cfg/apps/knowbase/admin/external_data_admin.py +685 -0
  67. django_cfg/apps/knowbase/apps.py +81 -0
  68. django_cfg/apps/knowbase/config/README.md +176 -0
  69. django_cfg/apps/knowbase/config/__init__.py +51 -0
  70. django_cfg/apps/knowbase/config/constance_fields.py +186 -0
  71. django_cfg/apps/knowbase/config/constance_settings.py +200 -0
  72. django_cfg/apps/knowbase/config/settings.py +444 -0
  73. django_cfg/apps/knowbase/examples/__init__.py +3 -0
  74. django_cfg/apps/knowbase/examples/external_data_usage.py +191 -0
  75. django_cfg/apps/knowbase/management/__init__.py +0 -0
  76. django_cfg/apps/knowbase/management/commands/__init__.py +0 -0
  77. django_cfg/apps/knowbase/management/commands/knowbase_stats.py +158 -0
  78. django_cfg/apps/knowbase/management/commands/setup_knowbase.py +59 -0
  79. django_cfg/apps/knowbase/managers/__init__.py +22 -0
  80. django_cfg/apps/knowbase/managers/archive.py +426 -0
  81. django_cfg/apps/knowbase/managers/base.py +32 -0
  82. django_cfg/apps/knowbase/managers/chat.py +141 -0
  83. django_cfg/apps/knowbase/managers/document.py +203 -0
  84. django_cfg/apps/knowbase/managers/external_data.py +471 -0
  85. django_cfg/apps/knowbase/migrations/0001_initial.py +427 -0
  86. django_cfg/apps/knowbase/migrations/0002_archiveitem_archiveitemchunk_documentarchive_and_more.py +434 -0
  87. django_cfg/apps/knowbase/migrations/__init__.py +5 -0
  88. django_cfg/apps/knowbase/mixins/__init__.py +15 -0
  89. django_cfg/apps/knowbase/mixins/config.py +108 -0
  90. django_cfg/apps/knowbase/mixins/creator.py +81 -0
  91. django_cfg/apps/knowbase/mixins/examples/vehicle_model_example.py +199 -0
  92. django_cfg/apps/knowbase/mixins/external_data_mixin.py +813 -0
  93. django_cfg/apps/knowbase/mixins/service.py +362 -0
  94. django_cfg/apps/knowbase/models/__init__.py +41 -0
  95. django_cfg/apps/knowbase/models/archive.py +599 -0
  96. django_cfg/apps/knowbase/models/base.py +58 -0
  97. django_cfg/apps/knowbase/models/chat.py +157 -0
  98. django_cfg/apps/knowbase/models/document.py +267 -0
  99. django_cfg/apps/knowbase/models/external_data.py +376 -0
  100. django_cfg/apps/knowbase/serializers/__init__.py +68 -0
  101. django_cfg/apps/knowbase/serializers/archive_serializers.py +386 -0
  102. django_cfg/apps/knowbase/serializers/chat_serializers.py +137 -0
  103. django_cfg/apps/knowbase/serializers/document_serializers.py +94 -0
  104. django_cfg/apps/knowbase/serializers/external_data_serializers.py +256 -0
  105. django_cfg/apps/knowbase/serializers/public_serializers.py +74 -0
  106. django_cfg/apps/knowbase/services/__init__.py +40 -0
  107. django_cfg/apps/knowbase/services/archive/__init__.py +42 -0
  108. django_cfg/apps/knowbase/services/archive/archive_service.py +541 -0
  109. django_cfg/apps/knowbase/services/archive/chunking_service.py +791 -0
  110. django_cfg/apps/knowbase/services/archive/exceptions.py +52 -0
  111. django_cfg/apps/knowbase/services/archive/extraction_service.py +508 -0
  112. django_cfg/apps/knowbase/services/archive/vectorization_service.py +362 -0
  113. django_cfg/apps/knowbase/services/base.py +53 -0
  114. django_cfg/apps/knowbase/services/chat_service.py +239 -0
  115. django_cfg/apps/knowbase/services/document_service.py +144 -0
  116. django_cfg/apps/knowbase/services/embedding/__init__.py +43 -0
  117. django_cfg/apps/knowbase/services/embedding/async_processor.py +244 -0
  118. django_cfg/apps/knowbase/services/embedding/batch_processor.py +250 -0
  119. django_cfg/apps/knowbase/services/embedding/batch_result.py +61 -0
  120. django_cfg/apps/knowbase/services/embedding/models.py +229 -0
  121. django_cfg/apps/knowbase/services/embedding/processors.py +148 -0
  122. django_cfg/apps/knowbase/services/embedding/utils.py +176 -0
  123. django_cfg/apps/knowbase/services/prompt_builder.py +191 -0
  124. django_cfg/apps/knowbase/services/search_service.py +293 -0
  125. django_cfg/apps/knowbase/signals/__init__.py +21 -0
  126. django_cfg/apps/knowbase/signals/archive_signals.py +211 -0
  127. django_cfg/apps/knowbase/signals/chat_signals.py +37 -0
  128. django_cfg/apps/knowbase/signals/document_signals.py +143 -0
  129. django_cfg/apps/knowbase/signals/external_data_signals.py +157 -0
  130. django_cfg/apps/knowbase/tasks/__init__.py +39 -0
  131. django_cfg/apps/knowbase/tasks/archive_tasks.py +316 -0
  132. django_cfg/apps/knowbase/tasks/document_processing.py +341 -0
  133. django_cfg/apps/knowbase/tasks/external_data_tasks.py +341 -0
  134. django_cfg/apps/knowbase/tasks/maintenance.py +195 -0
  135. django_cfg/apps/knowbase/urls.py +43 -0
  136. django_cfg/apps/knowbase/utils/__init__.py +12 -0
  137. django_cfg/apps/knowbase/utils/chunk_settings.py +261 -0
  138. django_cfg/apps/knowbase/utils/text_processing.py +375 -0
  139. django_cfg/apps/knowbase/utils/validation.py +99 -0
  140. django_cfg/apps/knowbase/views/__init__.py +28 -0
  141. django_cfg/apps/knowbase/views/archive_views.py +469 -0
  142. django_cfg/apps/knowbase/views/base.py +49 -0
  143. django_cfg/apps/knowbase/views/chat_views.py +181 -0
  144. django_cfg/apps/knowbase/views/document_views.py +183 -0
  145. django_cfg/apps/knowbase/views/public_views.py +129 -0
  146. django_cfg/apps/leads/admin.py +70 -0
  147. django_cfg/apps/newsletter/admin.py +234 -0
  148. django_cfg/apps/newsletter/admin_filters.py +124 -0
  149. django_cfg/apps/support/admin.py +196 -0
  150. django_cfg/apps/support/admin_filters.py +71 -0
  151. django_cfg/apps/support/templates/support/chat/ticket_chat.html +1 -1
  152. django_cfg/apps/urls.py +5 -4
  153. django_cfg/cli/README.md +1 -1
  154. django_cfg/cli/commands/create_project.py +2 -2
  155. django_cfg/cli/commands/info.py +1 -1
  156. django_cfg/config.py +44 -0
  157. django_cfg/core/config.py +29 -82
  158. django_cfg/core/environment.py +1 -1
  159. django_cfg/core/generation.py +19 -107
  160. django_cfg/{integration.py → core/integration.py} +18 -16
  161. django_cfg/core/validation.py +1 -1
  162. django_cfg/management/__init__.py +1 -1
  163. django_cfg/management/commands/__init__.py +1 -1
  164. django_cfg/management/commands/auto_generate.py +482 -0
  165. django_cfg/management/commands/migrator.py +19 -101
  166. django_cfg/management/commands/test_email.py +1 -1
  167. django_cfg/middleware/README.md +0 -158
  168. django_cfg/middleware/__init__.py +0 -2
  169. django_cfg/middleware/user_activity.py +3 -3
  170. django_cfg/models/api.py +145 -0
  171. django_cfg/models/base.py +287 -0
  172. django_cfg/models/cache.py +4 -4
  173. django_cfg/models/constance.py +25 -88
  174. django_cfg/models/database.py +9 -9
  175. django_cfg/models/drf.py +3 -36
  176. django_cfg/models/email.py +163 -0
  177. django_cfg/models/environment.py +276 -0
  178. django_cfg/models/limits.py +1 -1
  179. django_cfg/models/logging.py +366 -0
  180. django_cfg/models/revolution.py +41 -2
  181. django_cfg/models/security.py +125 -0
  182. django_cfg/models/services.py +1 -1
  183. django_cfg/modules/__init__.py +2 -56
  184. django_cfg/modules/base.py +78 -52
  185. django_cfg/modules/django_currency/service.py +2 -2
  186. django_cfg/modules/django_email.py +2 -2
  187. django_cfg/modules/django_health.py +267 -0
  188. django_cfg/modules/django_llm/llm/client.py +79 -17
  189. django_cfg/modules/django_llm/translator/translator.py +2 -2
  190. django_cfg/modules/django_logger.py +2 -2
  191. django_cfg/modules/django_ngrok.py +2 -2
  192. django_cfg/modules/django_tasks.py +68 -3
  193. django_cfg/modules/django_telegram.py +3 -3
  194. django_cfg/modules/django_twilio/sendgrid_service.py +2 -2
  195. django_cfg/modules/django_twilio/service.py +2 -2
  196. django_cfg/modules/django_twilio/simple_service.py +2 -2
  197. django_cfg/modules/django_twilio/templates/guide.md +266 -0
  198. django_cfg/modules/django_twilio/twilio_service.py +2 -2
  199. django_cfg/modules/django_unfold/__init__.py +69 -0
  200. django_cfg/modules/{unfold → django_unfold}/callbacks.py +23 -22
  201. django_cfg/modules/django_unfold/dashboard.py +278 -0
  202. django_cfg/modules/django_unfold/icons/README.md +145 -0
  203. django_cfg/modules/django_unfold/icons/__init__.py +12 -0
  204. django_cfg/modules/django_unfold/icons/constants.py +2851 -0
  205. django_cfg/modules/django_unfold/icons/generate_icons.py +486 -0
  206. django_cfg/modules/django_unfold/models/__init__.py +42 -0
  207. django_cfg/modules/django_unfold/models/config.py +601 -0
  208. django_cfg/modules/django_unfold/models/dashboard.py +206 -0
  209. django_cfg/modules/django_unfold/models/dropdown.py +40 -0
  210. django_cfg/modules/django_unfold/models/navigation.py +73 -0
  211. django_cfg/modules/django_unfold/models/tabs.py +25 -0
  212. django_cfg/modules/{unfold → django_unfold}/system_monitor.py +2 -2
  213. django_cfg/modules/django_unfold/utils.py +140 -0
  214. django_cfg/registry/__init__.py +23 -0
  215. django_cfg/registry/core.py +61 -0
  216. django_cfg/registry/exceptions.py +11 -0
  217. django_cfg/registry/modules.py +12 -0
  218. django_cfg/registry/services.py +26 -0
  219. django_cfg/registry/third_party.py +52 -0
  220. django_cfg/routing/__init__.py +19 -0
  221. django_cfg/routing/callbacks.py +198 -0
  222. django_cfg/routing/routers.py +48 -0
  223. django_cfg/templates/admin/layouts/dashboard_with_tabs.html +8 -9
  224. django_cfg/templatetags/__init__.py +0 -0
  225. django_cfg/templatetags/django_cfg.py +33 -0
  226. django_cfg/urls.py +33 -0
  227. django_cfg/utils/path_resolution.py +1 -1
  228. django_cfg/utils/smart_defaults.py +7 -61
  229. django_cfg/utils/toolkit.py +663 -0
  230. {django_cfg-1.1.81.dist-info → django_cfg-1.2.0.dist-info}/METADATA +83 -86
  231. django_cfg-1.2.0.dist-info/RECORD +441 -0
  232. django_cfg/apps/tasks/@docs/README.md +0 -195
  233. django_cfg/archive/django_sample.zip +0 -0
  234. django_cfg/models/unfold.py +0 -271
  235. django_cfg/modules/unfold/__init__.py +0 -29
  236. django_cfg/modules/unfold/dashboard.py +0 -318
  237. django_cfg/pyproject.toml +0 -370
  238. django_cfg/routers.py +0 -83
  239. django_cfg-1.1.81.dist-info/RECORD +0 -278
  240. /django_cfg/{exceptions.py → core/exceptions.py} +0 -0
  241. /django_cfg/modules/{unfold → django_unfold}/models.py +0 -0
  242. /django_cfg/modules/{unfold → django_unfold}/tailwind.py +0 -0
  243. /django_cfg/{version_check.py → utils/version_check.py} +0 -0
  244. {django_cfg-1.1.81.dist-info → django_cfg-1.2.0.dist-info}/WHEEL +0 -0
  245. {django_cfg-1.1.81.dist-info → django_cfg-1.2.0.dist-info}/entry_points.txt +0 -0
  246. {django_cfg-1.1.81.dist-info → django_cfg-1.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,61 @@
1
+ """
2
+ Batch processing result builder.
3
+
4
+ This module provides a clean way to build BatchProcessingResult
5
+ from individual batch operations without using raw dicts.
6
+ """
7
+
8
+ from typing import List
9
+ from .models import BatchProcessingResult, EmbeddingResult
10
+
11
+
12
+ class BatchResultBuilder:
13
+ """Builder for BatchProcessingResult to avoid raw dict usage."""
14
+
15
+ def __init__(self, total_chunks: int):
16
+ self.total_chunks = total_chunks
17
+ self.successful_chunks = 0
18
+ self.failed_chunks = 0
19
+ self.total_tokens = 0
20
+ self.total_cost = 0.0
21
+ self.errors: List[str] = []
22
+ self.start_time: float = 0.0
23
+
24
+ def add_successful_result(self, result: EmbeddingResult) -> None:
25
+ """Add a successful embedding result."""
26
+ if result.success:
27
+ self.successful_chunks += 1
28
+ self.total_tokens += result.tokens
29
+ self.total_cost += result.cost
30
+ else:
31
+ self.add_failed_result(result.error or "Unknown error")
32
+
33
+ def add_failed_result(self, error: str) -> None:
34
+ """Add a failed result."""
35
+ self.failed_chunks += 1
36
+ self.errors.append(error)
37
+
38
+ def add_batch_results(self, results: List[EmbeddingResult]) -> None:
39
+ """Add multiple results from a batch."""
40
+ for result in results:
41
+ if result.success:
42
+ self.add_successful_result(result)
43
+ else:
44
+ self.add_failed_result(result.error or "Unknown error")
45
+
46
+ def add_batch_error(self, error: str, chunk_count: int) -> None:
47
+ """Add an error that affected an entire batch."""
48
+ self.failed_chunks += chunk_count
49
+ self.errors.append(error)
50
+
51
+ def build(self, processing_time: float) -> BatchProcessingResult:
52
+ """Build the final BatchProcessingResult."""
53
+ return BatchProcessingResult(
54
+ total_chunks=self.total_chunks,
55
+ successful_chunks=self.successful_chunks,
56
+ failed_chunks=self.failed_chunks,
57
+ total_tokens=self.total_tokens,
58
+ total_cost=self.total_cost,
59
+ processing_time=processing_time,
60
+ errors=self.errors
61
+ )
@@ -0,0 +1,229 @@
1
+ """
2
+ Data models for embedding processing.
3
+
4
+ This module defines the core data structures used throughout
5
+ the embedding processing pipeline using Pydantic for type safety.
6
+ """
7
+
8
+ from typing import List, Dict, Any, Optional
9
+ from pydantic import BaseModel, Field, validator
10
+ from enum import Enum
11
+
12
+
13
+ class ChunkType(str, Enum):
14
+ """Supported chunk types."""
15
+ DOCUMENT = "document"
16
+ ARCHIVE = "archive"
17
+ EXTERNAL_DATA = "external_data"
18
+ UNKNOWN = "unknown"
19
+
20
+
21
+ class ChunkData(BaseModel):
22
+ """Unified chunk data structure for processing."""
23
+ id: str = Field(..., description="Unique chunk identifier")
24
+ content: str = Field(..., min_length=1, description="Chunk content text")
25
+ context_metadata: Optional[Dict[str, Any]] = Field(
26
+ default=None,
27
+ description="Additional context metadata for the chunk"
28
+ )
29
+ parent_id: Optional[str] = Field(
30
+ default=None,
31
+ description="ID of the parent document or archive"
32
+ )
33
+ parent_type: ChunkType = Field(
34
+ default=ChunkType.UNKNOWN,
35
+ description="Type of parent content"
36
+ )
37
+
38
+ @validator('content')
39
+ def content_must_not_be_empty(cls, v):
40
+ if not v or not v.strip():
41
+ raise ValueError('Content cannot be empty')
42
+ return v.strip()
43
+
44
+ class Config:
45
+ use_enum_values = True
46
+
47
+
48
+ class EmbeddingResult(BaseModel):
49
+ """Result of embedding generation."""
50
+ chunk_id: str = Field(..., description="ID of the processed chunk")
51
+ embedding: List[float] = Field(
52
+ default_factory=list,
53
+ description="Generated embedding vector"
54
+ )
55
+ tokens: int = Field(
56
+ default=0,
57
+ ge=0,
58
+ description="Number of tokens used"
59
+ )
60
+ cost: float = Field(
61
+ default=0.0,
62
+ ge=0.0,
63
+ description="Processing cost in USD"
64
+ )
65
+ success: bool = Field(
66
+ default=True,
67
+ description="Whether embedding generation was successful"
68
+ )
69
+ error: Optional[str] = Field(
70
+ default=None,
71
+ description="Error message if processing failed"
72
+ )
73
+ processing_time: Optional[float] = Field(
74
+ default=None,
75
+ ge=0.0,
76
+ description="Time taken to process this chunk in seconds"
77
+ )
78
+
79
+ @validator('embedding')
80
+ def validate_embedding_dimension(cls, v):
81
+ if v is not None and len(v) > 0 and len(v) not in [1536, 3072]: # Common OpenAI embedding dimensions
82
+ # Warning, not error - allow different dimensions
83
+ pass
84
+ return v
85
+
86
+ class Config:
87
+ validate_assignment = True
88
+
89
+
90
+ class BatchProcessingResult(BaseModel):
91
+ """Result of batch processing."""
92
+ total_chunks: int = Field(
93
+ ...,
94
+ ge=0,
95
+ description="Total number of chunks processed"
96
+ )
97
+ successful_chunks: int = Field(
98
+ ...,
99
+ ge=0,
100
+ description="Number of successfully processed chunks"
101
+ )
102
+ failed_chunks: int = Field(
103
+ ...,
104
+ ge=0,
105
+ description="Number of failed chunks"
106
+ )
107
+ total_tokens: int = Field(
108
+ default=0,
109
+ ge=0,
110
+ description="Total tokens used across all chunks"
111
+ )
112
+ total_cost: float = Field(
113
+ default=0.0,
114
+ ge=0.0,
115
+ description="Total processing cost in USD"
116
+ )
117
+ processing_time: float = Field(
118
+ ...,
119
+ ge=0.0,
120
+ description="Total processing time in seconds"
121
+ )
122
+ errors: List[str] = Field(
123
+ default_factory=list,
124
+ description="List of error messages"
125
+ )
126
+
127
+ # Computed properties
128
+ @property
129
+ def success_rate(self) -> float:
130
+ """Calculate success rate as percentage."""
131
+ if self.total_chunks == 0:
132
+ return 0.0
133
+ return (self.successful_chunks / self.total_chunks) * 100.0
134
+
135
+ @property
136
+ def chunks_per_second(self) -> float:
137
+ """Calculate processing speed."""
138
+ if self.processing_time == 0:
139
+ return 0.0
140
+ return self.total_chunks / self.processing_time
141
+
142
+ @property
143
+ def average_cost_per_chunk(self) -> float:
144
+ """Calculate average cost per successfully processed chunk."""
145
+ if self.successful_chunks == 0:
146
+ return 0.0
147
+ return self.total_cost / self.successful_chunks
148
+
149
+ @property
150
+ def average_tokens_per_chunk(self) -> float:
151
+ """Calculate average tokens per successfully processed chunk."""
152
+ if self.successful_chunks == 0:
153
+ return 0.0
154
+ return self.total_tokens / self.successful_chunks
155
+
156
+ @validator('successful_chunks', 'failed_chunks')
157
+ def validate_chunk_counts(cls, v, values):
158
+ if 'total_chunks' in values:
159
+ total = values['total_chunks']
160
+ if v > total:
161
+ raise ValueError(f'Chunk count cannot exceed total chunks ({total})')
162
+ return v
163
+
164
+ @validator('failed_chunks')
165
+ def validate_total_consistency(cls, v, values):
166
+ if 'total_chunks' in values and 'successful_chunks' in values:
167
+ expected_failed = values['total_chunks'] - values['successful_chunks']
168
+ if v != expected_failed:
169
+ raise ValueError(
170
+ f'Failed chunks ({v}) + successful chunks ({values["successful_chunks"]}) '
171
+ f'must equal total chunks ({values["total_chunks"]})'
172
+ )
173
+ return v
174
+
175
+ class Config:
176
+ validate_assignment = True
177
+
178
+ def model_dump_summary(self) -> Dict[str, Any]:
179
+ """Get a summary dict for logging."""
180
+ return {
181
+ "total_chunks": self.total_chunks,
182
+ "successful": self.successful_chunks,
183
+ "failed": self.failed_chunks,
184
+ "success_rate": f"{self.success_rate:.1f}%",
185
+ "total_tokens": self.total_tokens,
186
+ "total_cost": f"${self.total_cost:.4f}",
187
+ "processing_time": f"{self.processing_time:.2f}s",
188
+ "chunks_per_second": f"{self.chunks_per_second:.1f}",
189
+ "avg_cost_per_chunk": f"${self.average_cost_per_chunk:.4f}",
190
+ "error_count": len(self.errors)
191
+ }
192
+
193
+
194
+ class ProcessingConfig(BaseModel):
195
+ """Configuration for embedding processing."""
196
+ batch_size: int = Field(
197
+ default=100,
198
+ ge=1,
199
+ le=2048,
200
+ description="Number of chunks to process in one batch"
201
+ )
202
+ embedding_model: str = Field(
203
+ default="text-embedding-ada-002",
204
+ description="OpenAI embedding model to use"
205
+ )
206
+ max_retries: int = Field(
207
+ default=3,
208
+ ge=0,
209
+ le=10,
210
+ description="Maximum number of retries for failed requests"
211
+ )
212
+ retry_delay: float = Field(
213
+ default=1.0,
214
+ ge=0.0,
215
+ description="Delay between retries in seconds"
216
+ )
217
+ rate_limit_delay: float = Field(
218
+ default=0.5,
219
+ ge=0.0,
220
+ description="Delay between batches to respect rate limits"
221
+ )
222
+ timeout_seconds: int = Field(
223
+ default=60,
224
+ ge=1,
225
+ description="Timeout for API requests in seconds"
226
+ )
227
+
228
+ class Config:
229
+ validate_assignment = True
@@ -0,0 +1,148 @@
1
+ """
2
+ Chunk processors for different content types.
3
+
4
+ This module provides specialized processors for handling
5
+ document and archive chunks with their specific requirements.
6
+ """
7
+
8
+ import logging
9
+ from typing import Protocol
10
+
11
+ from django_cfg.apps.knowbase.models import DocumentChunk, ArchiveItemChunk, ExternalDataChunk
12
+ from .models import ChunkData, EmbeddingResult
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class ChunkProcessor(Protocol):
18
+ """Protocol for chunk processors."""
19
+
20
+ def prepare_content_for_embedding(self, chunk: ChunkData) -> str:
21
+ """Prepare chunk content for embedding generation."""
22
+ ...
23
+
24
+ def save_embedding_result(self, chunk_id: str, result: EmbeddingResult) -> None:
25
+ """Save embedding result to database."""
26
+ ...
27
+
28
+
29
+ class DocumentChunkProcessor:
30
+ """Processor for document chunks."""
31
+
32
+ def prepare_content_for_embedding(self, chunk: ChunkData) -> str:
33
+ """Prepare document chunk content for embedding."""
34
+ return chunk.content.strip()
35
+
36
+ def save_embedding_result(self, chunk_id: str, result: EmbeddingResult) -> None:
37
+ """Save embedding result for document chunk."""
38
+ try:
39
+ logger.debug(f"🔍 Looking for document chunk with id: {chunk_id}")
40
+ chunk = DocumentChunk.objects.get(id=chunk_id)
41
+ logger.debug(f"📄 Found document chunk: {chunk.id}, current embedding length: {len(chunk.embedding) if chunk.embedding is not None and len(chunk.embedding) > 0 else 0}")
42
+
43
+ chunk.embedding = result.embedding
44
+ chunk.token_count = result.tokens
45
+ chunk.embedding_cost = result.cost
46
+ chunk.save(update_fields=['embedding', 'token_count', 'embedding_cost'])
47
+
48
+ logger.info(f"✅ Document chunk {chunk_id} embedding saved: {result.tokens} tokens, ${result.cost:.4f}, embedding_len={len(result.embedding)}")
49
+
50
+ except DocumentChunk.DoesNotExist:
51
+ logger.error(f"❌ Document chunk {chunk_id} not found")
52
+ raise
53
+ except Exception as e:
54
+ logger.error(f"❌ Error saving document chunk {chunk_id}: {e}")
55
+ raise
56
+
57
+
58
+ class ArchiveChunkProcessor:
59
+ """Processor for archive chunks."""
60
+
61
+ def prepare_content_for_embedding(self, chunk: ChunkData) -> str:
62
+ """Prepare archive chunk content for embedding with context."""
63
+ content = chunk.content
64
+ context = chunk.context_metadata or {}
65
+
66
+ # Build context prefix for better embeddings
67
+ context_parts = []
68
+
69
+ if context.get('file_path'):
70
+ context_parts.append(f"File: {context['file_path']}")
71
+ if context.get('function_name'):
72
+ context_parts.append(f"Function: {context['function_name']}")
73
+ if context.get('class_name'):
74
+ context_parts.append(f"Class: {context['class_name']}")
75
+ if context.get('language'):
76
+ context_parts.append(f"Language: {context['language']}")
77
+
78
+ if context_parts:
79
+ context_prefix = " | ".join(context_parts)
80
+ enhanced_content = f"{context_prefix}\n\n{content}"
81
+ else:
82
+ enhanced_content = content
83
+
84
+ # Ensure content is not too long for embedding model
85
+ max_length = 8000 # Conservative limit
86
+ if len(enhanced_content) > max_length:
87
+ if context_parts:
88
+ context_prefix_len = len(context_prefix) + 2 # +2 for \n\n
89
+ available_content_len = max_length - context_prefix_len
90
+ if available_content_len > 100: # Ensure we have meaningful content
91
+ truncated_content = content[:available_content_len] + "..."
92
+ enhanced_content = f"{context_prefix}\n\n{truncated_content}"
93
+ else:
94
+ enhanced_content = content[:max_length] + "..."
95
+ else:
96
+ enhanced_content = content[:max_length] + "..."
97
+
98
+ return enhanced_content.strip()
99
+
100
+ def save_embedding_result(self, chunk_id: str, result: EmbeddingResult) -> None:
101
+ """Save embedding result for archive chunk."""
102
+ try:
103
+ chunk = ArchiveItemChunk.objects.select_related('item').get(id=chunk_id)
104
+ chunk.embedding = result.embedding
105
+ chunk.token_count = result.tokens
106
+ chunk.embedding_cost = result.cost
107
+ chunk.save(update_fields=['embedding', 'token_count', 'embedding_cost'])
108
+
109
+ # Update parent item statistics
110
+ item = chunk.item
111
+ item.total_tokens += result.tokens
112
+ item.processing_cost += result.cost
113
+ item.save(update_fields=['total_tokens', 'processing_cost'])
114
+
115
+ logger.debug(f"✅ Archive chunk {chunk_id} embedding saved: {result.tokens} tokens, ${result.cost:.4f}")
116
+
117
+ except ArchiveItemChunk.DoesNotExist:
118
+ logger.error(f"❌ Archive chunk {chunk_id} not found")
119
+ raise
120
+
121
+
122
+ class ExternalDataChunkProcessor:
123
+ """Processor for external data chunks."""
124
+
125
+ def prepare_content_for_embedding(self, chunk: ChunkData) -> str:
126
+ """Prepare external data chunk content for embedding."""
127
+ return chunk.content.strip()
128
+
129
+ def save_embedding_result(self, chunk_id: str, result: EmbeddingResult) -> None:
130
+ """Save embedding result for external data chunk."""
131
+ try:
132
+ logger.debug(f"🔍 Looking for external data chunk with id: {chunk_id}")
133
+ chunk = ExternalDataChunk.objects.get(id=chunk_id)
134
+ logger.debug(f"🔗 Found external data chunk: {chunk.id}, current embedding length: {len(chunk.embedding) if chunk.embedding is not None and len(chunk.embedding) > 0 else 0}")
135
+
136
+ chunk.embedding = result.embedding
137
+ chunk.token_count = result.tokens
138
+ chunk.embedding_cost = result.cost
139
+ chunk.save(update_fields=['embedding', 'token_count', 'embedding_cost'])
140
+
141
+ logger.info(f"✅ External data chunk {chunk_id} embedding saved: {result.tokens} tokens, ${result.cost:.4f}, embedding_len={len(result.embedding)}")
142
+
143
+ except ExternalDataChunk.DoesNotExist:
144
+ logger.error(f"❌ External data chunk {chunk_id} not found")
145
+ raise
146
+ except Exception as e:
147
+ logger.error(f"❌ Error saving external data chunk {chunk_id}: {e}")
148
+ raise
@@ -0,0 +1,176 @@
1
+ """
2
+ Utility functions for embedding processing.
3
+
4
+ This module provides convenient wrapper functions for common
5
+ embedding processing operations.
6
+ """
7
+
8
+ import logging
9
+ from typing import List, Union
10
+
11
+ from django_cfg.apps.knowbase.models import DocumentChunk, ArchiveItemChunk, ExternalDataChunk
12
+
13
+ from .models import ChunkData, BatchProcessingResult, ChunkType
14
+ from .batch_processor import OptimizedEmbeddingProcessor
15
+ from .async_processor import AsyncOptimizedEmbeddingProcessor
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def process_document_chunks_optimized(document_chunks: List[DocumentChunk]) -> BatchProcessingResult:
21
+ """Process document chunks with optimized batch operations."""
22
+
23
+ chunk_data = [
24
+ ChunkData(
25
+ id=str(chunk.id),
26
+ content=chunk.content,
27
+ parent_id=str(chunk.document_id),
28
+ parent_type=ChunkType.DOCUMENT
29
+ )
30
+ for chunk in document_chunks
31
+ if chunk.content and chunk.content.strip()
32
+ ]
33
+
34
+ processor = OptimizedEmbeddingProcessor()
35
+ return processor.process_chunks_batch(chunk_data)
36
+
37
+
38
+ def process_archive_chunks_optimized(archive_chunks: List[ArchiveItemChunk]) -> BatchProcessingResult:
39
+ """Process archive chunks with optimized batch operations."""
40
+
41
+ chunk_data = [
42
+ ChunkData(
43
+ id=str(chunk.id),
44
+ content=chunk.content,
45
+ context_metadata=chunk.context_metadata,
46
+ parent_id=str(chunk.item_id),
47
+ parent_type=ChunkType.ARCHIVE
48
+ )
49
+ for chunk in archive_chunks
50
+ if chunk.content and chunk.content.strip()
51
+ ]
52
+
53
+ processor = OptimizedEmbeddingProcessor()
54
+ return processor.process_chunks_batch(chunk_data)
55
+
56
+
57
+ async def aprocess_document_chunks_optimized(document_chunks: List[DocumentChunk]) -> BatchProcessingResult:
58
+ """Async version of document chunk processing."""
59
+
60
+ chunk_data = [
61
+ ChunkData(
62
+ id=str(chunk.id),
63
+ content=chunk.content,
64
+ parent_id=str(chunk.document_id),
65
+ parent_type=ChunkType.DOCUMENT
66
+ )
67
+ for chunk in document_chunks
68
+ if chunk.content and chunk.content.strip()
69
+ ]
70
+
71
+ processor = AsyncOptimizedEmbeddingProcessor()
72
+ return await processor.aprocess_chunks_batch(chunk_data)
73
+
74
+
75
+ async def aprocess_archive_chunks_optimized(archive_chunks: List[ArchiveItemChunk]) -> BatchProcessingResult:
76
+ """Async version of archive chunk processing."""
77
+
78
+ chunk_data = [
79
+ ChunkData(
80
+ id=str(chunk.id),
81
+ content=chunk.content,
82
+ context_metadata=chunk.context_metadata,
83
+ parent_id=str(chunk.item_id),
84
+ parent_type=ChunkType.ARCHIVE
85
+ )
86
+ for chunk in archive_chunks
87
+ if chunk.content and chunk.content.strip()
88
+ ]
89
+
90
+ processor = AsyncOptimizedEmbeddingProcessor()
91
+ return await processor.aprocess_chunks_batch(chunk_data)
92
+
93
+
94
+ def process_chunks_context_aware(chunks: Union[List[DocumentChunk], List[ArchiveItemChunk]]) -> BatchProcessingResult:
95
+ """
96
+ Context-aware chunk processing that works in both sync and async environments.
97
+
98
+ This function automatically detects the execution context and uses appropriate methods.
99
+ """
100
+ if not chunks:
101
+ return BatchProcessingResult(
102
+ total_chunks=0,
103
+ successful_chunks=0,
104
+ failed_chunks=0,
105
+ total_tokens=0,
106
+ total_cost=0.0,
107
+ processing_time=0.0,
108
+ errors=[]
109
+ )
110
+
111
+ # Determine chunk type
112
+ first_chunk = chunks[0]
113
+ if isinstance(first_chunk, DocumentChunk):
114
+ chunk_data = [
115
+ ChunkData(
116
+ id=str(chunk.id),
117
+ content=chunk.content,
118
+ parent_id=str(chunk.document_id),
119
+ parent_type=ChunkType.DOCUMENT
120
+ )
121
+ for chunk in chunks
122
+ if chunk.content and chunk.content.strip()
123
+ ]
124
+ elif isinstance(first_chunk, ArchiveItemChunk):
125
+ chunk_data = [
126
+ ChunkData(
127
+ id=str(chunk.id),
128
+ content=chunk.content,
129
+ context_metadata=chunk.context_metadata,
130
+ parent_id=str(chunk.item_id),
131
+ parent_type=ChunkType.ARCHIVE
132
+ )
133
+ for chunk in chunks
134
+ if chunk.content and chunk.content.strip()
135
+ ]
136
+ else:
137
+ raise ValueError(f"Unsupported chunk type: {type(first_chunk)}")
138
+
139
+ processor = AsyncOptimizedEmbeddingProcessor()
140
+ return processor.process_chunks_batch_context_aware(chunk_data)
141
+
142
+
143
+ def process_external_data_chunks_optimized(external_data_chunks: List[ExternalDataChunk]) -> BatchProcessingResult:
144
+ """Process external data chunks with optimized batch operations."""
145
+
146
+ chunk_data = [
147
+ ChunkData(
148
+ id=str(chunk.id),
149
+ content=chunk.content,
150
+ context_metadata=chunk.chunk_metadata,
151
+ parent_id=str(chunk.external_data.id),
152
+ parent_type=ChunkType.EXTERNAL_DATA
153
+ )
154
+ for chunk in external_data_chunks
155
+ ]
156
+
157
+ processor = OptimizedEmbeddingProcessor()
158
+ return processor.process_chunks_batch(chunk_data)
159
+
160
+
161
+ async def aprocess_external_data_chunks_optimized(external_data_chunks: List[ExternalDataChunk]) -> BatchProcessingResult:
162
+ """Async version of external data chunk processing."""
163
+
164
+ chunk_data = [
165
+ ChunkData(
166
+ id=str(chunk.id),
167
+ content=chunk.content,
168
+ context_metadata=chunk.chunk_metadata,
169
+ parent_id=str(chunk.external_data.id),
170
+ parent_type=ChunkType.EXTERNAL_DATA
171
+ )
172
+ for chunk in external_data_chunks
173
+ ]
174
+
175
+ processor = AsyncOptimizedEmbeddingProcessor()
176
+ return await processor.aprocess_chunks_batch(chunk_data)