createsonline 0.1.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- createsonline/__init__.py +46 -0
- createsonline/admin/__init__.py +7 -0
- createsonline/admin/content.py +526 -0
- createsonline/admin/crud.py +805 -0
- createsonline/admin/field_builder.py +559 -0
- createsonline/admin/integration.py +482 -0
- createsonline/admin/interface.py +2562 -0
- createsonline/admin/model_creator.py +513 -0
- createsonline/admin/model_manager.py +388 -0
- createsonline/admin/modern_dashboard.py +498 -0
- createsonline/admin/permissions.py +264 -0
- createsonline/admin/user_forms.py +594 -0
- createsonline/ai/__init__.py +202 -0
- createsonline/ai/fields.py +1226 -0
- createsonline/ai/orm.py +325 -0
- createsonline/ai/services.py +1244 -0
- createsonline/app.py +506 -0
- createsonline/auth/__init__.py +8 -0
- createsonline/auth/management.py +228 -0
- createsonline/auth/models.py +552 -0
- createsonline/cli/__init__.py +5 -0
- createsonline/cli/commands/__init__.py +122 -0
- createsonline/cli/commands/database.py +416 -0
- createsonline/cli/commands/info.py +173 -0
- createsonline/cli/commands/initdb.py +218 -0
- createsonline/cli/commands/project.py +545 -0
- createsonline/cli/commands/serve.py +173 -0
- createsonline/cli/commands/shell.py +93 -0
- createsonline/cli/commands/users.py +148 -0
- createsonline/cli/main.py +2041 -0
- createsonline/cli/manage.py +274 -0
- createsonline/config/__init__.py +9 -0
- createsonline/config/app.py +2577 -0
- createsonline/config/database.py +179 -0
- createsonline/config/docs.py +384 -0
- createsonline/config/errors.py +160 -0
- createsonline/config/orm.py +43 -0
- createsonline/config/request.py +93 -0
- createsonline/config/settings.py +176 -0
- createsonline/data/__init__.py +23 -0
- createsonline/data/dataframe.py +925 -0
- createsonline/data/io.py +453 -0
- createsonline/data/series.py +557 -0
- createsonline/database/__init__.py +60 -0
- createsonline/database/abstraction.py +440 -0
- createsonline/database/assistant.py +585 -0
- createsonline/database/fields.py +442 -0
- createsonline/database/migrations.py +132 -0
- createsonline/database/models.py +604 -0
- createsonline/database.py +438 -0
- createsonline/http/__init__.py +28 -0
- createsonline/http/client.py +535 -0
- createsonline/ml/__init__.py +55 -0
- createsonline/ml/classification.py +552 -0
- createsonline/ml/clustering.py +680 -0
- createsonline/ml/metrics.py +542 -0
- createsonline/ml/neural.py +560 -0
- createsonline/ml/preprocessing.py +784 -0
- createsonline/ml/regression.py +501 -0
- createsonline/performance/__init__.py +19 -0
- createsonline/performance/cache.py +444 -0
- createsonline/performance/compression.py +335 -0
- createsonline/performance/core.py +419 -0
- createsonline/project_init.py +789 -0
- createsonline/routing.py +528 -0
- createsonline/security/__init__.py +34 -0
- createsonline/security/core.py +811 -0
- createsonline/security/encryption.py +349 -0
- createsonline/server.py +295 -0
- createsonline/static/css/admin.css +263 -0
- createsonline/static/css/common.css +358 -0
- createsonline/static/css/dashboard.css +89 -0
- createsonline/static/favicon.ico +0 -0
- createsonline/static/icons/icon-128x128.png +0 -0
- createsonline/static/icons/icon-128x128.webp +0 -0
- createsonline/static/icons/icon-16x16.png +0 -0
- createsonline/static/icons/icon-16x16.webp +0 -0
- createsonline/static/icons/icon-180x180.png +0 -0
- createsonline/static/icons/icon-180x180.webp +0 -0
- createsonline/static/icons/icon-192x192.png +0 -0
- createsonline/static/icons/icon-192x192.webp +0 -0
- createsonline/static/icons/icon-256x256.png +0 -0
- createsonline/static/icons/icon-256x256.webp +0 -0
- createsonline/static/icons/icon-32x32.png +0 -0
- createsonline/static/icons/icon-32x32.webp +0 -0
- createsonline/static/icons/icon-384x384.png +0 -0
- createsonline/static/icons/icon-384x384.webp +0 -0
- createsonline/static/icons/icon-48x48.png +0 -0
- createsonline/static/icons/icon-48x48.webp +0 -0
- createsonline/static/icons/icon-512x512.png +0 -0
- createsonline/static/icons/icon-512x512.webp +0 -0
- createsonline/static/icons/icon-64x64.png +0 -0
- createsonline/static/icons/icon-64x64.webp +0 -0
- createsonline/static/image/android-chrome-192x192.png +0 -0
- createsonline/static/image/android-chrome-512x512.png +0 -0
- createsonline/static/image/apple-touch-icon.png +0 -0
- createsonline/static/image/favicon-16x16.png +0 -0
- createsonline/static/image/favicon-32x32.png +0 -0
- createsonline/static/image/favicon.ico +0 -0
- createsonline/static/image/favicon.svg +17 -0
- createsonline/static/image/icon-128x128.png +0 -0
- createsonline/static/image/icon-128x128.webp +0 -0
- createsonline/static/image/icon-16x16.png +0 -0
- createsonline/static/image/icon-16x16.webp +0 -0
- createsonline/static/image/icon-180x180.png +0 -0
- createsonline/static/image/icon-180x180.webp +0 -0
- createsonline/static/image/icon-192x192.png +0 -0
- createsonline/static/image/icon-192x192.webp +0 -0
- createsonline/static/image/icon-256x256.png +0 -0
- createsonline/static/image/icon-256x256.webp +0 -0
- createsonline/static/image/icon-32x32.png +0 -0
- createsonline/static/image/icon-32x32.webp +0 -0
- createsonline/static/image/icon-384x384.png +0 -0
- createsonline/static/image/icon-384x384.webp +0 -0
- createsonline/static/image/icon-48x48.png +0 -0
- createsonline/static/image/icon-48x48.webp +0 -0
- createsonline/static/image/icon-512x512.png +0 -0
- createsonline/static/image/icon-512x512.webp +0 -0
- createsonline/static/image/icon-64x64.png +0 -0
- createsonline/static/image/icon-64x64.webp +0 -0
- createsonline/static/image/logo-header-h100.png +0 -0
- createsonline/static/image/logo-header-h100.webp +0 -0
- createsonline/static/image/logo-header-h200@2x.png +0 -0
- createsonline/static/image/logo-header-h200@2x.webp +0 -0
- createsonline/static/image/logo.png +0 -0
- createsonline/static/js/admin.js +274 -0
- createsonline/static/site.webmanifest +35 -0
- createsonline/static/templates/admin/base.html +87 -0
- createsonline/static/templates/admin/dashboard.html +217 -0
- createsonline/static/templates/admin/model_form.html +270 -0
- createsonline/static/templates/admin/model_list.html +202 -0
- createsonline/static/test_script.js +15 -0
- createsonline/static/test_styles.css +59 -0
- createsonline/static_files.py +365 -0
- createsonline/templates/404.html +100 -0
- createsonline/templates/admin_login.html +169 -0
- createsonline/templates/base.html +102 -0
- createsonline/templates/index.html +151 -0
- createsonline/templates.py +205 -0
- createsonline/testing.py +322 -0
- createsonline/utils.py +448 -0
- createsonline/validation/__init__.py +49 -0
- createsonline/validation/fields.py +598 -0
- createsonline/validation/models.py +504 -0
- createsonline/validation/validators.py +561 -0
- createsonline/views.py +184 -0
- createsonline-0.1.26.dist-info/METADATA +46 -0
- createsonline-0.1.26.dist-info/RECORD +152 -0
- createsonline-0.1.26.dist-info/WHEEL +5 -0
- createsonline-0.1.26.dist-info/entry_points.txt +2 -0
- createsonline-0.1.26.dist-info/licenses/LICENSE +21 -0
- createsonline-0.1.26.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1244 @@
|
|
|
1
|
+
# createsonline/ai/services.py
|
|
2
|
+
"""
|
|
3
|
+
CREATESONLINE AI Services - COMPLETE ENHANCED VERSION
|
|
4
|
+
|
|
5
|
+
AI service implementations for OpenAI, Anthropic, local ML models,
|
|
6
|
+
and vector operations. Provides unified interface with internal fallback.
|
|
7
|
+
"""
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import hashlib
|
|
11
|
+
import math
|
|
12
|
+
import random
|
|
13
|
+
import time
|
|
14
|
+
from typing import Dict, Any, List, Optional
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
|
|
18
|
+
# Internal imports
|
|
19
|
+
try:
|
|
20
|
+
from ..http.client import HTTPClient, AsyncHTTPClient
|
|
21
|
+
INTERNAL_HTTP_AVAILABLE = True
|
|
22
|
+
except ImportError:
|
|
23
|
+
INTERNAL_HTTP_AVAILABLE = False
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
import numpy as np
|
|
27
|
+
NUMPY_AVAILABLE = True
|
|
28
|
+
except ImportError:
|
|
29
|
+
NUMPY_AVAILABLE = False
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
from ..data.dataframe import CreatesonlineDataFrame
|
|
33
|
+
from ..data.series import CreatesonlineSeries
|
|
34
|
+
INTERNAL_DATA_AVAILABLE = True
|
|
35
|
+
except ImportError:
|
|
36
|
+
INTERNAL_DATA_AVAILABLE = False
|
|
37
|
+
|
|
38
|
+
# ========================================
|
|
39
|
+
# BASE AI SERVICE INTERFACE
|
|
40
|
+
# ========================================
|
|
41
|
+
|
|
42
|
+
class BaseAIService(ABC):
|
|
43
|
+
"""Base class for AI services with caching and configuration"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, config: Dict[str, Any]):
|
|
46
|
+
"""Initialize AI service with configuration"""
|
|
47
|
+
self.config = config
|
|
48
|
+
self.timeout = config.get("timeout", 30)
|
|
49
|
+
self.enable_caching = config.get("enable_caching", True)
|
|
50
|
+
self.cache_ttl = config.get("cache_ttl", 3600)
|
|
51
|
+
self._cache = {}
|
|
52
|
+
self._stats = {
|
|
53
|
+
"requests": 0,
|
|
54
|
+
"cache_hits": 0,
|
|
55
|
+
"cache_misses": 0,
|
|
56
|
+
"errors": 0,
|
|
57
|
+
"total_tokens": 0,
|
|
58
|
+
"avg_response_time": 0.0
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
def _get_cache_key(self, operation: str, **kwargs) -> str:
|
|
62
|
+
"""Generate cache key for operation"""
|
|
63
|
+
# Remove sensitive data from cache key
|
|
64
|
+
clean_kwargs = {k: v for k, v in kwargs.items() if 'key' not in k.lower() and 'token' not in k.lower()}
|
|
65
|
+
cache_data = {
|
|
66
|
+
"operation": operation,
|
|
67
|
+
"params": clean_kwargs,
|
|
68
|
+
"service": self.__class__.__name__
|
|
69
|
+
}
|
|
70
|
+
cache_str = json.dumps(cache_data, sort_keys=True, default=str)
|
|
71
|
+
return hashlib.md5(cache_str.encode()).hexdigest()
|
|
72
|
+
|
|
73
|
+
def _get_cached_result(self, cache_key: str) -> Optional[Any]:
|
|
74
|
+
"""Get cached result if available and valid"""
|
|
75
|
+
if not self.enable_caching or cache_key not in self._cache:
|
|
76
|
+
self._stats["cache_misses"] += 1
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
cached_item = self._cache[cache_key]
|
|
80
|
+
|
|
81
|
+
if (datetime.utcnow().timestamp() - cached_item["timestamp"]) < self.cache_ttl:
|
|
82
|
+
self._stats["cache_hits"] += 1
|
|
83
|
+
return cached_item["result"]
|
|
84
|
+
else:
|
|
85
|
+
# Remove expired cache entry
|
|
86
|
+
del self._cache[cache_key]
|
|
87
|
+
self._stats["cache_misses"] += 1
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
def _set_cached_result(self, cache_key: str, result: Any):
|
|
91
|
+
"""Cache result with timestamp"""
|
|
92
|
+
if self.enable_caching:
|
|
93
|
+
self._cache[cache_key] = {
|
|
94
|
+
"result": result,
|
|
95
|
+
"timestamp": datetime.utcnow().timestamp()
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
def _update_stats(self, operation: str, response_time: float = 0.0, tokens: int = 0, error: bool = False):
|
|
99
|
+
"""Update service statistics"""
|
|
100
|
+
self._stats["requests"] += 1
|
|
101
|
+
if error:
|
|
102
|
+
self._stats["errors"] += 1
|
|
103
|
+
if tokens:
|
|
104
|
+
self._stats["total_tokens"] += tokens
|
|
105
|
+
if response_time:
|
|
106
|
+
# Update average response time
|
|
107
|
+
current_avg = self._stats["avg_response_time"]
|
|
108
|
+
total_requests = self._stats["requests"]
|
|
109
|
+
self._stats["avg_response_time"] = (current_avg * (total_requests - 1) + response_time) / total_requests
|
|
110
|
+
|
|
111
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
112
|
+
"""Get service statistics"""
|
|
113
|
+
return {
|
|
114
|
+
**self._stats,
|
|
115
|
+
"cache_size": len(self._cache),
|
|
116
|
+
"cache_hit_rate": self._stats["cache_hits"] / max(1, self._stats["cache_hits"] + self._stats["cache_misses"]),
|
|
117
|
+
"error_rate": self._stats["errors"] / max(1, self._stats["requests"]),
|
|
118
|
+
"uptime": "operational"
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
def clear_cache(self):
|
|
122
|
+
"""Clear service cache"""
|
|
123
|
+
self._cache.clear()
|
|
124
|
+
|
|
125
|
+
@abstractmethod
|
|
126
|
+
async def generate_text(self, prompt: str, **kwargs) -> str:
|
|
127
|
+
"""Generate text using AI service"""
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
@abstractmethod
|
|
131
|
+
async def get_embedding(self, text: str, **kwargs) -> List[float]:
|
|
132
|
+
"""Get text embedding"""
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
# ========================================
|
|
136
|
+
# ENHANCED INTERNAL AI ENGINE
|
|
137
|
+
# ========================================
|
|
138
|
+
|
|
139
|
+
class EnhancedInternalAIEngine:
|
|
140
|
+
"""Enhanced pure Python AI engine with better algorithms"""
|
|
141
|
+
|
|
142
|
+
def __init__(self):
|
|
143
|
+
self.cache = {}
|
|
144
|
+
self.models = {}
|
|
145
|
+
self.vocabulary = set()
|
|
146
|
+
self.patterns = {
|
|
147
|
+
'positive': ['good', 'great', 'excellent', 'amazing', 'wonderful', 'fantastic', 'awesome', 'brilliant', 'perfect', 'outstanding'],
|
|
148
|
+
'negative': ['bad', 'terrible', 'awful', 'horrible', 'disappointing', 'poor', 'worst', 'pathetic', 'disgusting', 'dreadful'],
|
|
149
|
+
'technical': ['api', 'framework', 'algorithm', 'database', 'server', 'client', 'protocol', 'interface', 'implementation'],
|
|
150
|
+
'business': ['revenue', 'profit', 'customer', 'market', 'sales', 'growth', 'strategy', 'roi', 'conversion', 'acquisition']
|
|
151
|
+
}
|
|
152
|
+
self._build_vocabulary()
|
|
153
|
+
|
|
154
|
+
def _build_vocabulary(self):
|
|
155
|
+
"""Build internal vocabulary from patterns"""
|
|
156
|
+
for category, words in self.patterns.items():
|
|
157
|
+
self.vocabulary.update(words)
|
|
158
|
+
|
|
159
|
+
def hash_text(self, text: str) -> str:
|
|
160
|
+
"""Generate consistent hash for text"""
|
|
161
|
+
return hashlib.md5(text.encode()).hexdigest()
|
|
162
|
+
|
|
163
|
+
def generate_embedding(self, text: str, dimensions: int = 768) -> List[float]:
|
|
164
|
+
"""Generate enhanced embedding from text using TF-IDF-like approach"""
|
|
165
|
+
text_lower = text.lower()
|
|
166
|
+
words = [word for word in text_lower.split() if word.isalpha()]
|
|
167
|
+
|
|
168
|
+
# Calculate word frequencies
|
|
169
|
+
word_freq = {}
|
|
170
|
+
for word in words:
|
|
171
|
+
word_freq[word] = word_freq.get(word, 0) + 1
|
|
172
|
+
|
|
173
|
+
# Generate embedding based on semantic patterns
|
|
174
|
+
embedding = [0.0] * dimensions
|
|
175
|
+
|
|
176
|
+
# Use hash for base randomness but add semantic meaning
|
|
177
|
+
hash_val = self.hash_text(text)
|
|
178
|
+
|
|
179
|
+
for i in range(dimensions):
|
|
180
|
+
# Base value from hash
|
|
181
|
+
seed_char = hash_val[i % len(hash_val)]
|
|
182
|
+
base_value = ord(seed_char) / 255.0
|
|
183
|
+
|
|
184
|
+
# Add semantic components
|
|
185
|
+
semantic_boost = 0.0
|
|
186
|
+
|
|
187
|
+
# Check for pattern matches
|
|
188
|
+
for category, pattern_words in self.patterns.items():
|
|
189
|
+
category_score = sum(1 for word in words if word in pattern_words) / max(1, len(words))
|
|
190
|
+
if category_score > 0:
|
|
191
|
+
# Add category-specific components to certain dimensions
|
|
192
|
+
if i % 4 == hash(category) % 4:
|
|
193
|
+
semantic_boost += category_score * 0.3
|
|
194
|
+
|
|
195
|
+
# Calculate TF-IDF-like score for dimension
|
|
196
|
+
if i < len(words):
|
|
197
|
+
word = words[i % len(words)]
|
|
198
|
+
tf = word_freq.get(word, 0) / len(words)
|
|
199
|
+
# Simple IDF approximation
|
|
200
|
+
idf = math.log(1000 / (10 + sum(1 for w in self.vocabulary if w == word)))
|
|
201
|
+
tfidf_component = tf * idf * 0.2
|
|
202
|
+
else:
|
|
203
|
+
tfidf_component = 0.0
|
|
204
|
+
|
|
205
|
+
# Combine components
|
|
206
|
+
final_value = (base_value * 0.5 + semantic_boost + tfidf_component) - 0.5
|
|
207
|
+
embedding[i] = max(-1.0, min(1.0, final_value)) # Clamp to [-1, 1]
|
|
208
|
+
|
|
209
|
+
return embedding
|
|
210
|
+
|
|
211
|
+
def similarity(self, vec1: List[float], vec2: List[float]) -> float:
|
|
212
|
+
"""Enhanced cosine similarity calculation"""
|
|
213
|
+
if not vec1 or not vec2 or len(vec1) != len(vec2):
|
|
214
|
+
return 0.0
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
if NUMPY_AVAILABLE:
|
|
218
|
+
v1 = np.array(vec1)
|
|
219
|
+
v2 = np.array(vec2)
|
|
220
|
+
return float(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
|
|
221
|
+
else:
|
|
222
|
+
# Manual calculation
|
|
223
|
+
dot_product = sum(a * b for a, b in zip(vec1, vec2))
|
|
224
|
+
norm1 = math.sqrt(sum(a * a for a in vec1))
|
|
225
|
+
norm2 = math.sqrt(sum(b * b for b in vec2))
|
|
226
|
+
|
|
227
|
+
if norm1 == 0 or norm2 == 0:
|
|
228
|
+
return 0.0
|
|
229
|
+
|
|
230
|
+
return dot_product / (norm1 * norm2)
|
|
231
|
+
except:
|
|
232
|
+
return 0.0
|
|
233
|
+
|
|
234
|
+
def generate_text(self, prompt: str, max_tokens: int = 100) -> str:
|
|
235
|
+
"""Enhanced text generation with better context understanding"""
|
|
236
|
+
prompt_lower = prompt.lower()
|
|
237
|
+
words = prompt_lower.split()
|
|
238
|
+
|
|
239
|
+
# Analyze prompt intent
|
|
240
|
+
intent = self._analyze_intent(prompt_lower, words)
|
|
241
|
+
|
|
242
|
+
# Generate response based on intent
|
|
243
|
+
if intent == 'summary':
|
|
244
|
+
return self._generate_summary(prompt, max_tokens)
|
|
245
|
+
elif intent == 'question':
|
|
246
|
+
return self._generate_answer(prompt, max_tokens)
|
|
247
|
+
elif intent == 'creative':
|
|
248
|
+
return self._generate_creative(prompt, max_tokens)
|
|
249
|
+
elif intent == 'technical':
|
|
250
|
+
return self._generate_technical(prompt, max_tokens)
|
|
251
|
+
else:
|
|
252
|
+
return self._generate_general(prompt, max_tokens)
|
|
253
|
+
|
|
254
|
+
def _analyze_intent(self, prompt_lower: str, words: List[str]) -> str:
|
|
255
|
+
"""Analyze prompt intent for better generation"""
|
|
256
|
+
if any(word in prompt_lower for word in ['summarize', 'summary', 'sum up', 'brief']):
|
|
257
|
+
return 'summary'
|
|
258
|
+
elif any(word in prompt_lower for word in ['what', 'how', 'why', 'when', 'where', 'who', '?']):
|
|
259
|
+
return 'question'
|
|
260
|
+
elif any(word in prompt_lower for word in ['write', 'create', 'story', 'poem', 'creative']):
|
|
261
|
+
return 'creative'
|
|
262
|
+
elif any(word in prompt_lower for word in ['api', 'code', 'function', 'algorithm', 'technical', 'implement']):
|
|
263
|
+
return 'technical'
|
|
264
|
+
else:
|
|
265
|
+
return 'general'
|
|
266
|
+
|
|
267
|
+
def _generate_summary(self, prompt: str, max_tokens: int) -> str:
|
|
268
|
+
"""Generate summary-style response"""
|
|
269
|
+
key_phrases = self._extract_key_phrases(prompt)
|
|
270
|
+
return f"Summary: Key points include {', '.join(key_phrases[:3])}. {prompt[:100]}... (Generated by CREATESONLINE AI)"
|
|
271
|
+
|
|
272
|
+
def _generate_answer(self, prompt: str, max_tokens: int) -> str:
|
|
273
|
+
"""Generate answer-style response"""
|
|
274
|
+
if 'what is' in prompt.lower():
|
|
275
|
+
subject = prompt.lower().split('what is')[1].strip().split()[0]
|
|
276
|
+
return f"{subject.title()} is a concept/entity that relates to the context you've provided. Based on the CREATESONLINE AI analysis, this appears to be significant in your domain."
|
|
277
|
+
elif 'how to' in prompt.lower():
|
|
278
|
+
return f"To accomplish this task: 1) Analyze the requirements, 2) Plan the approach, 3) Implement systematically. CREATESONLINE recommends breaking down complex tasks into manageable steps."
|
|
279
|
+
else:
|
|
280
|
+
return f"Based on your question, the CREATESONLINE AI suggests considering multiple factors and approaches. The context indicates this is an important query that requires thoughtful analysis."
|
|
281
|
+
|
|
282
|
+
def _generate_creative(self, prompt: str, max_tokens: int) -> str:
|
|
283
|
+
"""Generate creative content"""
|
|
284
|
+
themes = self._extract_themes(prompt)
|
|
285
|
+
return f"Creative Response: Inspired by {', '.join(themes)}, this creates an engaging narrative that captures the essence of your request. The CREATESONLINE AI weaves together elements to form a compelling piece."
|
|
286
|
+
|
|
287
|
+
def _generate_technical(self, prompt: str, max_tokens: int) -> str:
|
|
288
|
+
"""Generate technical response"""
|
|
289
|
+
tech_terms = [word for word in prompt.split() if word.lower() in self.patterns['technical']]
|
|
290
|
+
return f"Technical Analysis: Regarding {', '.join(tech_terms)}, the CREATESONLINE framework recommends implementing best practices with consideration for scalability, maintainability, and performance optimization."
|
|
291
|
+
|
|
292
|
+
def _generate_general(self, prompt: str, max_tokens: int) -> str:
|
|
293
|
+
"""Generate general response"""
|
|
294
|
+
return f"AI Response: Based on your input '{prompt[:50]}...', the CREATESONLINE AI provides contextually relevant information and insights tailored to your specific needs and requirements."
|
|
295
|
+
|
|
296
|
+
def _extract_key_phrases(self, text: str) -> List[str]:
|
|
297
|
+
"""Extract key phrases from text"""
|
|
298
|
+
words = [word for word in text.lower().split() if word.isalpha() and len(word) > 3]
|
|
299
|
+
# Simple frequency-based extraction
|
|
300
|
+
word_freq = {}
|
|
301
|
+
for word in words:
|
|
302
|
+
word_freq[word] = word_freq.get(word, 0) + 1
|
|
303
|
+
|
|
304
|
+
return sorted(word_freq.keys(), key=lambda x: word_freq[x], reverse=True)[:5]
|
|
305
|
+
|
|
306
|
+
def _extract_themes(self, text: str) -> List[str]:
|
|
307
|
+
"""Extract themes from text"""
|
|
308
|
+
themes = []
|
|
309
|
+
for category, pattern_words in self.patterns.items():
|
|
310
|
+
if any(word in text.lower() for word in pattern_words):
|
|
311
|
+
themes.append(category)
|
|
312
|
+
return themes or ['general']
|
|
313
|
+
|
|
314
|
+
def classify_text(self, text: str, categories: List[str] = None) -> Dict[str, float]:
|
|
315
|
+
"""Enhanced text classification"""
|
|
316
|
+
if not categories:
|
|
317
|
+
categories = list(self.patterns.keys()) + ['neutral']
|
|
318
|
+
|
|
319
|
+
text_lower = text.lower()
|
|
320
|
+
words = text_lower.split()
|
|
321
|
+
scores = {}
|
|
322
|
+
|
|
323
|
+
for category in categories:
|
|
324
|
+
if category in self.patterns:
|
|
325
|
+
# Pattern-based scoring
|
|
326
|
+
pattern_words = self.patterns[category]
|
|
327
|
+
matches = sum(1 for word in words if word in pattern_words)
|
|
328
|
+
scores[category] = min(1.0, matches / max(1, len(words)) * 2)
|
|
329
|
+
elif category == 'neutral':
|
|
330
|
+
# Neutral score is inverse of other categories
|
|
331
|
+
other_scores = [scores.get(cat, 0) for cat in self.patterns.keys()]
|
|
332
|
+
scores[category] = max(0.1, 1.0 - max(other_scores, default=0))
|
|
333
|
+
else:
|
|
334
|
+
scores[category] = 0.1 # Default low score
|
|
335
|
+
|
|
336
|
+
# Normalize scores
|
|
337
|
+
total = sum(scores.values())
|
|
338
|
+
if total > 0:
|
|
339
|
+
scores = {k: v / total for k, v in scores.items()}
|
|
340
|
+
|
|
341
|
+
return scores
|
|
342
|
+
|
|
343
|
+
def predict_numeric(self, features: Dict[str, Any]) -> float:
|
|
344
|
+
"""Enhanced numeric prediction with feature engineering"""
|
|
345
|
+
feature_sum = 0
|
|
346
|
+
feature_count = 0
|
|
347
|
+
|
|
348
|
+
for key, value in features.items():
|
|
349
|
+
if isinstance(value, (int, float)):
|
|
350
|
+
# Apply feature-specific weights
|
|
351
|
+
weight = 1.0
|
|
352
|
+
if 'score' in key.lower() or 'rating' in key.lower():
|
|
353
|
+
weight = 1.5
|
|
354
|
+
elif 'count' in key.lower() or 'number' in key.lower():
|
|
355
|
+
weight = 0.8
|
|
356
|
+
elif 'time' in key.lower() or 'duration' in key.lower():
|
|
357
|
+
weight = 0.6
|
|
358
|
+
|
|
359
|
+
feature_sum += value * weight
|
|
360
|
+
feature_count += 1
|
|
361
|
+
elif isinstance(value, str):
|
|
362
|
+
# Text features
|
|
363
|
+
sentiment_scores = self.classify_text(value, ['positive', 'negative'])
|
|
364
|
+
feature_sum += sentiment_scores.get('positive', 0) * 0.3
|
|
365
|
+
feature_count += 0.3
|
|
366
|
+
elif isinstance(value, bool):
|
|
367
|
+
feature_sum += 1.0 if value else 0.0
|
|
368
|
+
feature_count += 1
|
|
369
|
+
|
|
370
|
+
if feature_count == 0:
|
|
371
|
+
return random.random()
|
|
372
|
+
|
|
373
|
+
# Normalize and add some intelligent variation
|
|
374
|
+
base_score = feature_sum / feature_count
|
|
375
|
+
|
|
376
|
+
# Add deterministic but varied component based on feature hash
|
|
377
|
+
feature_hash = self.hash_text(str(sorted(features.items())))
|
|
378
|
+
hash_component = int(feature_hash[:8], 16) % 100 / 100.0
|
|
379
|
+
|
|
380
|
+
# Combine with sigmoid function for better distribution
|
|
381
|
+
final_score = 1 / (1 + math.exp(-(base_score - 0.5) * 3))
|
|
382
|
+
final_score = (final_score * 0.7) + (hash_component * 0.3)
|
|
383
|
+
|
|
384
|
+
return max(0.0, min(1.0, final_score))
|
|
385
|
+
|
|
386
|
+
# Global enhanced AI engine
|
|
387
|
+
_enhanced_ai_engine = EnhancedInternalAIEngine()
|
|
388
|
+
|
|
389
|
+
# ========================================
|
|
390
|
+
# OPENAI SERVICE (ENHANCED)
|
|
391
|
+
# ========================================
|
|
392
|
+
|
|
393
|
+
class OpenAIService(BaseAIService):
|
|
394
|
+
"""Enhanced OpenAI API service with better error handling and features"""
|
|
395
|
+
|
|
396
|
+
def __init__(self, config: Dict[str, Any]):
|
|
397
|
+
"""Initialize OpenAI service with enhanced configuration"""
|
|
398
|
+
super().__init__(config)
|
|
399
|
+
self.api_key = config.get("openai_api_key") or os.getenv("OPENAI_API_KEY")
|
|
400
|
+
self.base_url = config.get("base_url", "https://api.openai.com/v1")
|
|
401
|
+
self.organization = config.get("organization") or os.getenv("OPENAI_ORG_ID")
|
|
402
|
+
|
|
403
|
+
# Enhanced fallback mode
|
|
404
|
+
if not self.api_key:
|
|
405
|
+
self.api_key = "test-key-for-development"
|
|
406
|
+
self._test_mode = True
|
|
407
|
+
else:
|
|
408
|
+
self._test_mode = False
|
|
409
|
+
|
|
410
|
+
self.headers = {
|
|
411
|
+
"x-api-key": self.api_key,
|
|
412
|
+
"Content-Type": "application/json",
|
|
413
|
+
"anthropic-version": "2023-06-01"
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
# Model configurations
|
|
417
|
+
self.model_configs = {
|
|
418
|
+
"claude-3-opus-20240229": {"max_tokens": 4096, "cost_per_token": 0.000015},
|
|
419
|
+
"claude-3-sonnet-20240229": {"max_tokens": 4096, "cost_per_token": 0.000003},
|
|
420
|
+
"claude-3-haiku-20240307": {"max_tokens": 4096, "cost_per_token": 0.00000025},
|
|
421
|
+
"claude-instant-1.2": {"max_tokens": 8192, "cost_per_token": 0.0000008},
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
async def generate_text(
|
|
425
|
+
self,
|
|
426
|
+
prompt: str,
|
|
427
|
+
model: str = None,
|
|
428
|
+
max_tokens: int = None,
|
|
429
|
+
temperature: float = None,
|
|
430
|
+
system_prompt: str = None,
|
|
431
|
+
**kwargs
|
|
432
|
+
) -> str:
|
|
433
|
+
"""Enhanced Claude text generation"""
|
|
434
|
+
|
|
435
|
+
start_time = time.time()
|
|
436
|
+
model = model or self.config.get("default_llm_model", "claude-3-sonnet-20240229")
|
|
437
|
+
max_tokens = max_tokens or self.config.get("max_tokens", 500)
|
|
438
|
+
temperature = temperature or self.config.get("temperature", 0.7)
|
|
439
|
+
|
|
440
|
+
# Enhanced test mode
|
|
441
|
+
if self._test_mode or not INTERNAL_HTTP_AVAILABLE:
|
|
442
|
+
response_time = time.time() - start_time
|
|
443
|
+
result = f"Claude Response (CREATESONLINE): {_enhanced_ai_engine.generate_text(prompt, max_tokens)}"
|
|
444
|
+
self._update_stats("generate_text", response_time, len(result.split()))
|
|
445
|
+
return result
|
|
446
|
+
|
|
447
|
+
# Check cache
|
|
448
|
+
cache_key = self._get_cache_key(
|
|
449
|
+
"generate_text",
|
|
450
|
+
prompt=prompt,
|
|
451
|
+
model=model,
|
|
452
|
+
max_tokens=max_tokens,
|
|
453
|
+
temperature=temperature,
|
|
454
|
+
system_prompt=system_prompt
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
cached_result = self._get_cached_result(cache_key)
|
|
458
|
+
if cached_result:
|
|
459
|
+
return cached_result
|
|
460
|
+
|
|
461
|
+
# Prepare messages
|
|
462
|
+
messages = [{"role": "user", "content": prompt}]
|
|
463
|
+
|
|
464
|
+
request_data = {
|
|
465
|
+
"model": model,
|
|
466
|
+
"messages": messages,
|
|
467
|
+
"max_tokens": max_tokens,
|
|
468
|
+
"temperature": temperature
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
if system_prompt:
|
|
472
|
+
request_data["system"] = system_prompt
|
|
473
|
+
|
|
474
|
+
try:
|
|
475
|
+
# Use internal HTTP client
|
|
476
|
+
client = AsyncHTTPClient()
|
|
477
|
+
response = await client.post(
|
|
478
|
+
url=f"{self.base_url}/messages",
|
|
479
|
+
headers=self.headers,
|
|
480
|
+
json=request_data,
|
|
481
|
+
timeout=self.timeout
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
if response.status_code >= 400:
|
|
485
|
+
raise ValueError(f"Anthropic API error: {response.status_code}")
|
|
486
|
+
|
|
487
|
+
result = response.json()
|
|
488
|
+
generated_text = result["content"][0]["text"]
|
|
489
|
+
|
|
490
|
+
# Track usage
|
|
491
|
+
usage = result.get("usage", {})
|
|
492
|
+
total_tokens = usage.get("output_tokens", 0) + usage.get("input_tokens", 0)
|
|
493
|
+
|
|
494
|
+
response_time = time.time() - start_time
|
|
495
|
+
self._update_stats("generate_text", response_time, total_tokens)
|
|
496
|
+
|
|
497
|
+
# Cache result
|
|
498
|
+
self._set_cached_result(cache_key, generated_text)
|
|
499
|
+
|
|
500
|
+
return generated_text
|
|
501
|
+
|
|
502
|
+
except Exception as e:
|
|
503
|
+
self._update_stats("generate_text", time.time() - start_time, 0, True)
|
|
504
|
+
# Fallback to enhanced internal engine
|
|
505
|
+
return f"Claude (Enhanced): {_enhanced_ai_engine.generate_text(prompt, max_tokens)}"
|
|
506
|
+
|
|
507
|
+
async def get_embedding(self, text: str, **kwargs) -> List[float]:
|
|
508
|
+
"""Anthropic doesn't provide embeddings, use enhanced internal engine"""
|
|
509
|
+
return _enhanced_ai_engine.generate_embedding(text, kwargs.get("dimensions", 768))
|
|
510
|
+
|
|
511
|
+
async def analyze_sentiment(self, text: str) -> Dict[str, Any]:
|
|
512
|
+
"""Analyze sentiment using Claude or enhanced internal engine"""
|
|
513
|
+
if self._test_mode:
|
|
514
|
+
return _enhanced_ai_engine.classify_text(text, ["positive", "negative", "neutral"])
|
|
515
|
+
|
|
516
|
+
prompt = f"Analyze the sentiment of this text and return only a JSON object with 'sentiment' (positive/negative/neutral), 'confidence' (0-1): {text}"
|
|
517
|
+
|
|
518
|
+
try:
|
|
519
|
+
response = await self.generate_text(
|
|
520
|
+
prompt=prompt,
|
|
521
|
+
max_tokens=100,
|
|
522
|
+
temperature=0.1
|
|
523
|
+
)
|
|
524
|
+
# Try to parse JSON response
|
|
525
|
+
return json.loads(response)
|
|
526
|
+
except:
|
|
527
|
+
# Fallback to enhanced internal analysis
|
|
528
|
+
sentiment_scores = _enhanced_ai_engine.classify_text(text, ["positive", "negative", "neutral"])
|
|
529
|
+
best_sentiment = max(sentiment_scores.items(), key=lambda x: x[1])
|
|
530
|
+
return {
|
|
531
|
+
"sentiment": best_sentiment[0],
|
|
532
|
+
"confidence": best_sentiment[1]
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
# ========================================
|
|
536
|
+
# ENHANCED LOCAL ML SERVICE
|
|
537
|
+
# ========================================
|
|
538
|
+
|
|
539
|
+
class LocalMLService(BaseAIService):
|
|
540
|
+
"""Enhanced local machine learning service"""
|
|
541
|
+
|
|
542
|
+
def __init__(self, config: Dict[str, Any]):
|
|
543
|
+
"""Initialize enhanced local ML service"""
|
|
544
|
+
super().__init__(config)
|
|
545
|
+
self.models = {}
|
|
546
|
+
self.model_cache_dir = config.get("model_cache_dir", "./models")
|
|
547
|
+
self.vector_store = {}
|
|
548
|
+
|
|
549
|
+
# Create model directory if it doesn't exist
|
|
550
|
+
import os
|
|
551
|
+
os.makedirs(self.model_cache_dir, exist_ok=True)
|
|
552
|
+
|
|
553
|
+
# Enhanced preprocessing pipelines
|
|
554
|
+
self.preprocessors = {
|
|
555
|
+
"text": self._preprocess_text,
|
|
556
|
+
"numeric": self._preprocess_numeric,
|
|
557
|
+
"categorical": self._preprocess_categorical
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
async def generate_text(self, prompt: str, model: str = None, **kwargs) -> str:
|
|
561
|
+
"""Generate text using local models or enhanced internal engine"""
|
|
562
|
+
return _enhanced_ai_engine.generate_text(prompt, kwargs.get("max_tokens", 100))
|
|
563
|
+
|
|
564
|
+
async def get_embedding(self, text: str, model: str = None, **kwargs) -> List[float]:
|
|
565
|
+
"""Generate embeddings using local models"""
|
|
566
|
+
dimensions = kwargs.get("dimensions", 768)
|
|
567
|
+
return _enhanced_ai_engine.generate_embedding(text, dimensions)
|
|
568
|
+
|
|
569
|
+
async def predict(
|
|
570
|
+
self,
|
|
571
|
+
data: Dict[str, Any],
|
|
572
|
+
model_name: str,
|
|
573
|
+
prediction_type: str = "classification",
|
|
574
|
+
**kwargs
|
|
575
|
+
) -> Dict[str, Any]:
|
|
576
|
+
"""Enhanced prediction with better feature processing"""
|
|
577
|
+
|
|
578
|
+
start_time = time.time()
|
|
579
|
+
|
|
580
|
+
# Check cache
|
|
581
|
+
cache_key = self._get_cache_key(
|
|
582
|
+
"predict",
|
|
583
|
+
data=data,
|
|
584
|
+
model_name=model_name,
|
|
585
|
+
prediction_type=prediction_type
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
cached_result = self._get_cached_result(cache_key)
|
|
589
|
+
if cached_result:
|
|
590
|
+
return cached_result
|
|
591
|
+
|
|
592
|
+
try:
|
|
593
|
+
# Load or create model
|
|
594
|
+
model = await self._get_or_create_model(model_name, prediction_type)
|
|
595
|
+
|
|
596
|
+
# Enhanced feature preparation
|
|
597
|
+
features = await self._prepare_enhanced_features(data)
|
|
598
|
+
|
|
599
|
+
# Make prediction
|
|
600
|
+
if prediction_type == "classification":
|
|
601
|
+
prediction = await self._classify(model, features)
|
|
602
|
+
elif prediction_type == "regression":
|
|
603
|
+
prediction = await self._regress(model, features)
|
|
604
|
+
elif prediction_type == "clustering":
|
|
605
|
+
prediction = await self._cluster(model, features)
|
|
606
|
+
else:
|
|
607
|
+
prediction = await self._custom_predict(model, features, prediction_type)
|
|
608
|
+
|
|
609
|
+
result = {
|
|
610
|
+
"prediction": prediction["value"],
|
|
611
|
+
"confidence": prediction["confidence"],
|
|
612
|
+
"model": model_name,
|
|
613
|
+
"prediction_type": prediction_type,
|
|
614
|
+
"feature_importance": prediction.get("feature_importance", {}),
|
|
615
|
+
"explanation": prediction.get("explanation", "")
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
response_time = time.time() - start_time
|
|
619
|
+
self._update_stats("predict", response_time, len(str(data)))
|
|
620
|
+
|
|
621
|
+
# Cache result
|
|
622
|
+
self._set_cached_result(cache_key, result)
|
|
623
|
+
|
|
624
|
+
return result
|
|
625
|
+
|
|
626
|
+
except Exception as e:
|
|
627
|
+
self._update_stats("predict", time.time() - start_time, 0, True)
|
|
628
|
+
# Enhanced fallback prediction
|
|
629
|
+
return await self._enhanced_fallback_prediction(data, prediction_type)
|
|
630
|
+
|
|
631
|
+
async def _get_or_create_model(self, model_name: str, prediction_type: str):
|
|
632
|
+
"""Get existing model or create new one"""
|
|
633
|
+
if model_name not in self.models:
|
|
634
|
+
try:
|
|
635
|
+
# Try to load from cache
|
|
636
|
+
import os
|
|
637
|
+
model_path = os.path.join(self.model_cache_dir, f"{model_name}.pkl")
|
|
638
|
+
|
|
639
|
+
if os.path.exists(model_path):
|
|
640
|
+
import joblib
|
|
641
|
+
self.models[model_name] = joblib.load(model_path)
|
|
642
|
+
else:
|
|
643
|
+
# Create new model
|
|
644
|
+
self.models[model_name] = await self._create_model(prediction_type)
|
|
645
|
+
|
|
646
|
+
except ImportError:
|
|
647
|
+
# Create enhanced mock model
|
|
648
|
+
self.models[model_name] = {
|
|
649
|
+
"type": prediction_type,
|
|
650
|
+
"created": datetime.utcnow(),
|
|
651
|
+
"training_data": [],
|
|
652
|
+
"enhanced": True
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
return self.models[model_name]
|
|
656
|
+
|
|
657
|
+
async def _create_model(self, prediction_type: str):
|
|
658
|
+
"""Create new ML model"""
|
|
659
|
+
try:
|
|
660
|
+
if prediction_type == "classification":
|
|
661
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
662
|
+
model = RandomForestClassifier(n_estimators=100, random_state=42)
|
|
663
|
+
elif prediction_type == "regression":
|
|
664
|
+
from sklearn.ensemble import RandomForestRegressor
|
|
665
|
+
model = RandomForestRegressor(n_estimators=100, random_state=42)
|
|
666
|
+
elif prediction_type == "clustering":
|
|
667
|
+
from sklearn.cluster import KMeans
|
|
668
|
+
model = KMeans(n_clusters=3, random_state=42)
|
|
669
|
+
else:
|
|
670
|
+
# Default to classification
|
|
671
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
672
|
+
model = RandomForestClassifier(n_estimators=100, random_state=42)
|
|
673
|
+
|
|
674
|
+
# Train with dummy data
|
|
675
|
+
import numpy as np
|
|
676
|
+
X_dummy = np.random.rand(100, 10)
|
|
677
|
+
if prediction_type == "clustering":
|
|
678
|
+
model.fit(X_dummy)
|
|
679
|
+
else:
|
|
680
|
+
y_dummy = np.random.randint(0, 2, 100) if prediction_type == "classification" else np.random.rand(100)
|
|
681
|
+
model.fit(X_dummy, y_dummy)
|
|
682
|
+
|
|
683
|
+
return model
|
|
684
|
+
|
|
685
|
+
except ImportError:
|
|
686
|
+
# Return enhanced mock model
|
|
687
|
+
return {
|
|
688
|
+
"type": prediction_type,
|
|
689
|
+
"enhanced": True,
|
|
690
|
+
"weights": [random.random() for _ in range(10)]
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
async def _prepare_enhanced_features(self, data: Dict[str, Any]) -> List[float]:
|
|
694
|
+
"""Enhanced feature preparation with multiple data types"""
|
|
695
|
+
features = []
|
|
696
|
+
|
|
697
|
+
for key, value in data.items():
|
|
698
|
+
if isinstance(value, (int, float)):
|
|
699
|
+
# Numeric features with normalization
|
|
700
|
+
normalized_value = self._normalize_numeric(value)
|
|
701
|
+
features.append(normalized_value)
|
|
702
|
+
|
|
703
|
+
elif isinstance(value, str):
|
|
704
|
+
# Text features with enhanced processing
|
|
705
|
+
text_features = await self._extract_text_features(value)
|
|
706
|
+
features.extend(text_features)
|
|
707
|
+
|
|
708
|
+
elif isinstance(value, bool):
|
|
709
|
+
features.append(float(value))
|
|
710
|
+
|
|
711
|
+
elif isinstance(value, list):
|
|
712
|
+
# List features - statistical summary
|
|
713
|
+
if value and all(isinstance(x, (int, float)) for x in value):
|
|
714
|
+
features.extend([
|
|
715
|
+
sum(value) / len(value), # mean
|
|
716
|
+
max(value) - min(value), # range
|
|
717
|
+
len(value) # count
|
|
718
|
+
])
|
|
719
|
+
else:
|
|
720
|
+
features.append(len(value))
|
|
721
|
+
|
|
722
|
+
elif isinstance(value, dict):
|
|
723
|
+
# Dict features - extract numeric values
|
|
724
|
+
numeric_values = [v for v in value.values() if isinstance(v, (int, float))]
|
|
725
|
+
if numeric_values:
|
|
726
|
+
features.append(sum(numeric_values) / len(numeric_values))
|
|
727
|
+
else:
|
|
728
|
+
features.append(0.0)
|
|
729
|
+
|
|
730
|
+
# Ensure we have at least 10 features (pad if necessary)
|
|
731
|
+
while len(features) < 10:
|
|
732
|
+
features.append(0.0)
|
|
733
|
+
|
|
734
|
+
return features[:10] # Limit to 10 features for consistency
|
|
735
|
+
|
|
736
|
+
async def _extract_text_features(self, text: str) -> List[float]:
|
|
737
|
+
"""Extract enhanced features from text"""
|
|
738
|
+
text_lower = text.lower()
|
|
739
|
+
|
|
740
|
+
features = [
|
|
741
|
+
len(text), # Length
|
|
742
|
+
len(text.split()), # Word count
|
|
743
|
+
len(set(text.split())) / max(1, len(text.split())), # Unique word ratio
|
|
744
|
+
text.count('.') + text.count('!') + text.count('?'), # Sentence count
|
|
745
|
+
sum(1 for char in text if char.isupper()) / max(1, len(text)) # Uppercase ratio
|
|
746
|
+
]
|
|
747
|
+
|
|
748
|
+
# Sentiment features
|
|
749
|
+
sentiment_scores = _enhanced_ai_engine.classify_text(text, ["positive", "negative"])
|
|
750
|
+
features.extend([
|
|
751
|
+
sentiment_scores.get("positive", 0),
|
|
752
|
+
sentiment_scores.get("negative", 0)
|
|
753
|
+
])
|
|
754
|
+
|
|
755
|
+
return features
|
|
756
|
+
|
|
757
|
+
def _normalize_numeric(self, value: float) -> float:
|
|
758
|
+
"""Normalize numeric value"""
|
|
759
|
+
# Simple min-max normalization to [0, 1]
|
|
760
|
+
# In real implementation, this would use dataset statistics
|
|
761
|
+
if value < 0:
|
|
762
|
+
return 0.0
|
|
763
|
+
elif value > 100:
|
|
764
|
+
return 1.0
|
|
765
|
+
else:
|
|
766
|
+
return value / 100.0
|
|
767
|
+
|
|
768
|
+
async def _classify(self, model, features: List[float]) -> Dict[str, Any]:
|
|
769
|
+
"""Enhanced classification"""
|
|
770
|
+
try:
|
|
771
|
+
if hasattr(model, 'predict') and hasattr(model, 'predict_proba'):
|
|
772
|
+
# Real scikit-learn model
|
|
773
|
+
import numpy as np
|
|
774
|
+
X = np.array([features])
|
|
775
|
+
prediction = model.predict(X)[0]
|
|
776
|
+
probabilities = model.predict_proba(X)[0]
|
|
777
|
+
confidence = float(max(probabilities))
|
|
778
|
+
|
|
779
|
+
return {
|
|
780
|
+
"value": prediction,
|
|
781
|
+
"confidence": confidence,
|
|
782
|
+
"probabilities": probabilities.tolist(),
|
|
783
|
+
"explanation": f"Classified with {confidence:.2%} confidence"
|
|
784
|
+
}
|
|
785
|
+
else:
|
|
786
|
+
# Enhanced mock classification
|
|
787
|
+
return await self._enhanced_mock_classification(features)
|
|
788
|
+
|
|
789
|
+
except Exception:
|
|
790
|
+
return await self._enhanced_mock_classification(features)
|
|
791
|
+
|
|
792
|
+
async def _enhanced_mock_classification(self, features: List[float]) -> Dict[str, Any]:
|
|
793
|
+
"""Enhanced mock classification with feature analysis"""
|
|
794
|
+
# Analyze features for more intelligent prediction
|
|
795
|
+
feature_sum = sum(features)
|
|
796
|
+
feature_variance = sum((x - feature_sum/len(features))**2 for x in features) / len(features)
|
|
797
|
+
|
|
798
|
+
# Generate prediction based on feature analysis
|
|
799
|
+
if feature_sum > 5.0:
|
|
800
|
+
prediction = "high_value"
|
|
801
|
+
confidence = 0.85 + min(0.1, feature_variance * 0.1)
|
|
802
|
+
elif feature_sum > 2.5:
|
|
803
|
+
prediction = "medium_value"
|
|
804
|
+
confidence = 0.75 + min(0.1, feature_variance * 0.05)
|
|
805
|
+
else:
|
|
806
|
+
prediction = "low_value"
|
|
807
|
+
confidence = 0.65 + min(0.15, feature_variance * 0.15)
|
|
808
|
+
|
|
809
|
+
return {
|
|
810
|
+
"value": prediction,
|
|
811
|
+
"confidence": min(0.95, confidence),
|
|
812
|
+
"feature_importance": {f"feature_{i}": abs(f) for i, f in enumerate(features[:5])},
|
|
813
|
+
"explanation": f"Classification based on feature analysis (sum: {feature_sum:.2f})"
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
async def _regress(self, model, features: List[float]) -> Dict[str, Any]:
|
|
817
|
+
"""Enhanced regression"""
|
|
818
|
+
try:
|
|
819
|
+
if hasattr(model, 'predict'):
|
|
820
|
+
# Real scikit-learn model
|
|
821
|
+
import numpy as np
|
|
822
|
+
X = np.array([features])
|
|
823
|
+
prediction = float(model.predict(X)[0])
|
|
824
|
+
|
|
825
|
+
return {
|
|
826
|
+
"value": prediction,
|
|
827
|
+
"confidence": 0.8, # Would be calculated from model variance
|
|
828
|
+
"explanation": f"Regression prediction: {prediction:.3f}"
|
|
829
|
+
}
|
|
830
|
+
else:
|
|
831
|
+
return await self._enhanced_mock_regression(features)
|
|
832
|
+
|
|
833
|
+
except Exception:
|
|
834
|
+
return await self._enhanced_mock_regression(features)
|
|
835
|
+
|
|
836
|
+
async def _enhanced_mock_regression(self, features: List[float]) -> Dict[str, Any]:
|
|
837
|
+
"""Enhanced mock regression"""
|
|
838
|
+
# Weighted combination of features
|
|
839
|
+
weights = [0.3, 0.2, 0.15, 0.1, 0.08, 0.05, 0.04, 0.03, 0.03, 0.02]
|
|
840
|
+
prediction = sum(f * w for f, w in zip(features, weights))
|
|
841
|
+
|
|
842
|
+
# Add some non-linearity
|
|
843
|
+
prediction = 1 / (1 + math.exp(-prediction + 2.5)) # Sigmoid
|
|
844
|
+
|
|
845
|
+
return {
|
|
846
|
+
"value": prediction,
|
|
847
|
+
"confidence": 0.82,
|
|
848
|
+
"feature_importance": {f"feature_{i}": w for i, w in enumerate(weights)},
|
|
849
|
+
"explanation": f"Weighted regression prediction: {prediction:.3f}"
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
async def _cluster(self, model, features: List[float]) -> Dict[str, Any]:
|
|
853
|
+
"""Enhanced clustering"""
|
|
854
|
+
try:
|
|
855
|
+
if hasattr(model, 'predict'):
|
|
856
|
+
import numpy as np
|
|
857
|
+
X = np.array([features])
|
|
858
|
+
cluster = int(model.predict(X)[0])
|
|
859
|
+
|
|
860
|
+
return {
|
|
861
|
+
"value": cluster,
|
|
862
|
+
"confidence": 0.7,
|
|
863
|
+
"explanation": f"Assigned to cluster {cluster}"
|
|
864
|
+
}
|
|
865
|
+
else:
|
|
866
|
+
return await self._enhanced_mock_clustering(features)
|
|
867
|
+
|
|
868
|
+
except Exception:
|
|
869
|
+
return await self._enhanced_mock_clustering(features)
|
|
870
|
+
|
|
871
|
+
async def _enhanced_mock_clustering(self, features: List[float]) -> Dict[str, Any]:
|
|
872
|
+
"""Enhanced mock clustering"""
|
|
873
|
+
# Simple distance-based clustering
|
|
874
|
+
feature_sum = sum(features)
|
|
875
|
+
|
|
876
|
+
if feature_sum < 2.0:
|
|
877
|
+
cluster = 0
|
|
878
|
+
elif feature_sum < 4.0:
|
|
879
|
+
cluster = 1
|
|
880
|
+
else:
|
|
881
|
+
cluster = 2
|
|
882
|
+
|
|
883
|
+
return {
|
|
884
|
+
"value": cluster,
|
|
885
|
+
"confidence": 0.75,
|
|
886
|
+
"explanation": f"Distance-based clustering (sum: {feature_sum:.2f})"
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
async def _custom_predict(self, model, features: List[float], prediction_type: str) -> Dict[str, Any]:
|
|
890
|
+
"""Custom prediction for unknown types"""
|
|
891
|
+
return {
|
|
892
|
+
"value": _enhanced_ai_engine.predict_numeric({"features": features}),
|
|
893
|
+
"confidence": 0.6,
|
|
894
|
+
"explanation": f"Custom prediction for {prediction_type}"
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
async def _enhanced_fallback_prediction(self, data: Dict[str, Any], prediction_type: str) -> Dict[str, Any]:
|
|
898
|
+
"""Enhanced fallback prediction when models fail"""
|
|
899
|
+
if prediction_type == "classification":
|
|
900
|
+
return {
|
|
901
|
+
"prediction": "unknown",
|
|
902
|
+
"confidence": 0.5,
|
|
903
|
+
"model": "fallback_classifier",
|
|
904
|
+
"prediction_type": prediction_type,
|
|
905
|
+
"explanation": "Fallback classification due to model unavailability"
|
|
906
|
+
}
|
|
907
|
+
elif prediction_type == "regression":
|
|
908
|
+
value = _enhanced_ai_engine.predict_numeric(data)
|
|
909
|
+
return {
|
|
910
|
+
"prediction": value,
|
|
911
|
+
"confidence": 0.6,
|
|
912
|
+
"model": "fallback_regressor",
|
|
913
|
+
"prediction_type": prediction_type,
|
|
914
|
+
"explanation": f"Fallback regression prediction: {value:.3f}"
|
|
915
|
+
}
|
|
916
|
+
else:
|
|
917
|
+
return {
|
|
918
|
+
"prediction": 0,
|
|
919
|
+
"confidence": 0.4,
|
|
920
|
+
"model": "fallback_generic",
|
|
921
|
+
"prediction_type": prediction_type,
|
|
922
|
+
"explanation": "Generic fallback prediction"
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
def _preprocess_text(self, text: str) -> Dict[str, Any]:
|
|
926
|
+
"""Enhanced text preprocessing"""
|
|
927
|
+
return {
|
|
928
|
+
"length": len(text),
|
|
929
|
+
"word_count": len(text.split()),
|
|
930
|
+
"sentiment": _enhanced_ai_engine.classify_text(text),
|
|
931
|
+
"keywords": _enhanced_ai_engine._extract_key_phrases(text)[:5]
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
def _preprocess_numeric(self, value: float) -> Dict[str, Any]:
|
|
935
|
+
"""Enhanced numeric preprocessing"""
|
|
936
|
+
return {
|
|
937
|
+
"value": value,
|
|
938
|
+
"normalized": self._normalize_numeric(value),
|
|
939
|
+
"log_value": math.log(max(0.01, abs(value))),
|
|
940
|
+
"category": "high" if value > 10 else "medium" if value > 1 else "low"
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
def _preprocess_categorical(self, value: str) -> Dict[str, Any]:
|
|
944
|
+
"""Enhanced categorical preprocessing"""
|
|
945
|
+
return {
|
|
946
|
+
"value": value,
|
|
947
|
+
"hash": abs(hash(value)) % 1000,
|
|
948
|
+
"length": len(value),
|
|
949
|
+
"category_type": "text"
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
# ========================================
|
|
953
|
+
# ENHANCED VECTOR SERVICE
|
|
954
|
+
# ========================================
|
|
955
|
+
|
|
956
|
+
class VectorService(BaseAIService):
|
|
957
|
+
"""Enhanced vector operations and similarity search service"""
|
|
958
|
+
|
|
959
|
+
def __init__(self, config: Dict[str, Any]):
|
|
960
|
+
"""Initialize enhanced vector service"""
|
|
961
|
+
super().__init__(config)
|
|
962
|
+
self.vector_store = {} # In-memory vector store
|
|
963
|
+
self.indices = {} # Vector indices for faster search
|
|
964
|
+
self.index_name = "default"
|
|
965
|
+
self.distance_metrics = {
|
|
966
|
+
"cosine": self._cosine_similarity,
|
|
967
|
+
"euclidean": self._euclidean_distance,
|
|
968
|
+
"dot_product": self._dot_product,
|
|
969
|
+
"manhattan": self._manhattan_distance
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
async def generate_text(self, prompt: str, **kwargs) -> str:
|
|
973
|
+
"""Vector service doesn't generate text, fallback to internal engine"""
|
|
974
|
+
return _enhanced_ai_engine.generate_text(prompt, kwargs.get("max_tokens", 100))
|
|
975
|
+
|
|
976
|
+
async def get_embedding(self, text: str, **kwargs) -> List[float]:
|
|
977
|
+
"""Generate embedding for vector operations"""
|
|
978
|
+
dimensions = kwargs.get("dimensions", 768)
|
|
979
|
+
return _enhanced_ai_engine.generate_embedding(text, dimensions)
|
|
980
|
+
|
|
981
|
+
async def similarity_search(
|
|
982
|
+
self,
|
|
983
|
+
query_vector: List[float],
|
|
984
|
+
documents: List[Dict[str, Any]] = None,
|
|
985
|
+
top_k: int = 5,
|
|
986
|
+
threshold: float = 0.8,
|
|
987
|
+
metric: str = "cosine",
|
|
988
|
+
**kwargs
|
|
989
|
+
) -> List[Dict[str, Any]]:
|
|
990
|
+
"""Enhanced similarity search with multiple metrics and optimization"""
|
|
991
|
+
|
|
992
|
+
start_time = time.time()
|
|
993
|
+
|
|
994
|
+
if documents is None:
|
|
995
|
+
documents = self.vector_store.get(self.index_name, [])
|
|
996
|
+
|
|
997
|
+
if not documents:
|
|
998
|
+
return []
|
|
999
|
+
|
|
1000
|
+
# Check if we have a precomputed index
|
|
1001
|
+
index_key = f"{self.index_name}_{metric}_{len(documents)}"
|
|
1002
|
+
|
|
1003
|
+
try:
|
|
1004
|
+
results = []
|
|
1005
|
+
distance_func = self.distance_metrics.get(metric, self._cosine_similarity)
|
|
1006
|
+
|
|
1007
|
+
for i, doc in enumerate(documents):
|
|
1008
|
+
if "embedding" not in doc:
|
|
1009
|
+
# Generate embedding if missing
|
|
1010
|
+
text = doc.get("text", doc.get("content", str(doc)))
|
|
1011
|
+
doc["embedding"] = await self.get_embedding(text)
|
|
1012
|
+
|
|
1013
|
+
# Calculate similarity/distance
|
|
1014
|
+
if metric == "euclidean" or metric == "manhattan":
|
|
1015
|
+
distance = distance_func(query_vector, doc["embedding"])
|
|
1016
|
+
similarity = 1 / (1 + distance) # Convert distance to similarity
|
|
1017
|
+
else:
|
|
1018
|
+
similarity = distance_func(query_vector, doc["embedding"])
|
|
1019
|
+
|
|
1020
|
+
if similarity >= threshold:
|
|
1021
|
+
results.append({
|
|
1022
|
+
"document": doc,
|
|
1023
|
+
"similarity": similarity,
|
|
1024
|
+
"score": similarity,
|
|
1025
|
+
"index": i,
|
|
1026
|
+
"distance": 1 - similarity if metric != "euclidean" else distance_func(query_vector, doc["embedding"])
|
|
1027
|
+
})
|
|
1028
|
+
|
|
1029
|
+
# Sort by similarity (descending)
|
|
1030
|
+
results.sort(key=lambda x: x["similarity"], reverse=True)
|
|
1031
|
+
|
|
1032
|
+
# Return top_k results
|
|
1033
|
+
final_results = results[:top_k]
|
|
1034
|
+
|
|
1035
|
+
response_time = time.time() - start_time
|
|
1036
|
+
self._update_stats("similarity_search", response_time, len(documents))
|
|
1037
|
+
|
|
1038
|
+
return final_results
|
|
1039
|
+
|
|
1040
|
+
except Exception as e:
|
|
1041
|
+
self._update_stats("similarity_search", time.time() - start_time, 0, True)
|
|
1042
|
+
return []
|
|
1043
|
+
|
|
1044
|
+
def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
|
|
1045
|
+
"""Enhanced cosine similarity calculation"""
|
|
1046
|
+
if not vec1 or not vec2 or len(vec1) != len(vec2):
|
|
1047
|
+
return 0.0
|
|
1048
|
+
|
|
1049
|
+
try:
|
|
1050
|
+
if NUMPY_AVAILABLE:
|
|
1051
|
+
v1 = np.array(vec1)
|
|
1052
|
+
v2 = np.array(vec2)
|
|
1053
|
+
|
|
1054
|
+
dot_product = np.dot(v1, v2)
|
|
1055
|
+
norm_v1 = np.linalg.norm(v1)
|
|
1056
|
+
norm_v2 = np.linalg.norm(v2)
|
|
1057
|
+
|
|
1058
|
+
if norm_v1 == 0 or norm_v2 == 0:
|
|
1059
|
+
return 0.0
|
|
1060
|
+
|
|
1061
|
+
return float(dot_product / (norm_v1 * norm_v2))
|
|
1062
|
+
|
|
1063
|
+
else:
|
|
1064
|
+
# Manual calculation
|
|
1065
|
+
dot_product = sum(a * b for a, b in zip(vec1, vec2))
|
|
1066
|
+
norm_v1 = sum(a * a for a in vec1) ** 0.5
|
|
1067
|
+
norm_v2 = sum(b * b for b in vec2) ** 0.5
|
|
1068
|
+
|
|
1069
|
+
if norm_v1 == 0 or norm_v2 == 0:
|
|
1070
|
+
return 0.0
|
|
1071
|
+
|
|
1072
|
+
return dot_product / (norm_v1 * norm_v2)
|
|
1073
|
+
|
|
1074
|
+
except Exception:
|
|
1075
|
+
return 0.0
|
|
1076
|
+
|
|
1077
|
+
def _euclidean_distance(self, vec1: List[float], vec2: List[float]) -> float:
|
|
1078
|
+
"""Calculate Euclidean distance"""
|
|
1079
|
+
if not vec1 or not vec2 or len(vec1) != len(vec2):
|
|
1080
|
+
return float('inf')
|
|
1081
|
+
|
|
1082
|
+
try:
|
|
1083
|
+
if NUMPY_AVAILABLE:
|
|
1084
|
+
v1 = np.array(vec1)
|
|
1085
|
+
v2 = np.array(vec2)
|
|
1086
|
+
return float(np.linalg.norm(v1 - v2))
|
|
1087
|
+
else:
|
|
1088
|
+
return sum((a - b) ** 2 for a, b in zip(vec1, vec2)) ** 0.5
|
|
1089
|
+
except Exception:
|
|
1090
|
+
return float('inf')
|
|
1091
|
+
|
|
1092
|
+
def _dot_product(self, vec1: List[float], vec2: List[float]) -> float:
|
|
1093
|
+
"""Calculate dot product"""
|
|
1094
|
+
if not vec1 or not vec2 or len(vec1) != len(vec2):
|
|
1095
|
+
return 0.0
|
|
1096
|
+
|
|
1097
|
+
try:
|
|
1098
|
+
if NUMPY_AVAILABLE:
|
|
1099
|
+
return float(np.dot(vec1, vec2))
|
|
1100
|
+
else:
|
|
1101
|
+
return sum(a * b for a, b in zip(vec1, vec2))
|
|
1102
|
+
except Exception:
|
|
1103
|
+
return 0.0
|
|
1104
|
+
|
|
1105
|
+
def _manhattan_distance(self, vec1: List[float], vec2: List[float]) -> float:
|
|
1106
|
+
"""Calculate Manhattan distance"""
|
|
1107
|
+
if not vec1 or not vec2 or len(vec1) != len(vec2):
|
|
1108
|
+
return float('inf')
|
|
1109
|
+
|
|
1110
|
+
try:
|
|
1111
|
+
return sum(abs(a - b) for a, b in zip(vec1, vec2))
|
|
1112
|
+
except Exception:
|
|
1113
|
+
return float('inf')
|
|
1114
|
+
|
|
1115
|
+
async def add_documents(
|
|
1116
|
+
self,
|
|
1117
|
+
documents: List[Dict[str, Any]],
|
|
1118
|
+
index_name: str = None,
|
|
1119
|
+
auto_embed: bool = True,
|
|
1120
|
+
**kwargs
|
|
1121
|
+
):
|
|
1122
|
+
"""Enhanced document addition with automatic embedding"""
|
|
1123
|
+
index_name = index_name or self.index_name
|
|
1124
|
+
|
|
1125
|
+
if index_name not in self.vector_store:
|
|
1126
|
+
self.vector_store[index_name] = []
|
|
1127
|
+
|
|
1128
|
+
processed_docs = []
|
|
1129
|
+
|
|
1130
|
+
for doc in documents:
|
|
1131
|
+
if auto_embed and "embedding" not in doc:
|
|
1132
|
+
# Generate embedding for document
|
|
1133
|
+
text = doc.get("text", doc.get("content", str(doc)))
|
|
1134
|
+
doc["embedding"] = await self.get_embedding(text)
|
|
1135
|
+
doc["embedding_model"] = "enhanced_internal"
|
|
1136
|
+
doc["embedding_timestamp"] = datetime.utcnow().isoformat()
|
|
1137
|
+
|
|
1138
|
+
# Add metadata
|
|
1139
|
+
doc["added_at"] = datetime.utcnow().isoformat()
|
|
1140
|
+
doc["index"] = index_name
|
|
1141
|
+
doc["id"] = doc.get("id", f"doc_{len(self.vector_store[index_name])}")
|
|
1142
|
+
|
|
1143
|
+
processed_docs.append(doc)
|
|
1144
|
+
|
|
1145
|
+
self.vector_store[index_name].extend(processed_docs)
|
|
1146
|
+
|
|
1147
|
+
return {
|
|
1148
|
+
"added": len(processed_docs),
|
|
1149
|
+
"total": len(self.vector_store[index_name]),
|
|
1150
|
+
"index": index_name
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
async def create_index(
|
|
1154
|
+
self,
|
|
1155
|
+
index_name: str,
|
|
1156
|
+
dimensions: int,
|
|
1157
|
+
metric: str = "cosine",
|
|
1158
|
+
**kwargs
|
|
1159
|
+
):
|
|
1160
|
+
"""Create a new vector index with optimization"""
|
|
1161
|
+
self.vector_store[index_name] = []
|
|
1162
|
+
self.indices[index_name] = {
|
|
1163
|
+
"dimensions": dimensions,
|
|
1164
|
+
"metric": metric,
|
|
1165
|
+
"created_at": datetime.utcnow().isoformat(),
|
|
1166
|
+
"document_count": 0
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
return {
|
|
1170
|
+
"status": "created",
|
|
1171
|
+
"index": index_name,
|
|
1172
|
+
"dimensions": dimensions,
|
|
1173
|
+
"metric": metric
|
|
1174
|
+
}
|
|
1175
|
+
|
|
1176
|
+
async def get_index_stats(self, index_name: str = None) -> Dict[str, Any]:
|
|
1177
|
+
"""Get statistics for vector index"""
|
|
1178
|
+
index_name = index_name or self.index_name
|
|
1179
|
+
|
|
1180
|
+
documents = self.vector_store.get(index_name, [])
|
|
1181
|
+
|
|
1182
|
+
return {
|
|
1183
|
+
"index_name": index_name,
|
|
1184
|
+
"document_count": len(documents),
|
|
1185
|
+
"has_embeddings": sum(1 for doc in documents if "embedding" in doc),
|
|
1186
|
+
"average_vector_length": (
|
|
1187
|
+
sum(len(doc.get("embedding", [])) for doc in documents) / max(1, len(documents))
|
|
1188
|
+
if documents else 0
|
|
1189
|
+
),
|
|
1190
|
+
"index_size_mb": len(str(documents)) / (1024 * 1024), # Rough estimate
|
|
1191
|
+
"last_updated": max(
|
|
1192
|
+
(doc.get("added_at", "") for doc in documents),
|
|
1193
|
+
default="never"
|
|
1194
|
+
)
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
def clear_index(self, index_name: str = None):
|
|
1198
|
+
"""Clear vector index"""
|
|
1199
|
+
index_name = index_name or self.index_name
|
|
1200
|
+
|
|
1201
|
+
if index_name in self.vector_store:
|
|
1202
|
+
del self.vector_store[index_name]
|
|
1203
|
+
|
|
1204
|
+
if index_name in self.indices:
|
|
1205
|
+
del self.indices[index_name]
|
|
1206
|
+
|
|
1207
|
+
if not self.api_key or self.api_key == "test-key-for-development":
|
|
1208
|
+
self.api_key = "test-key-for-development"
|
|
1209
|
+
self._test_mode = True
|
|
1210
|
+
else:
|
|
1211
|
+
self._test_mode = False
|
|
1212
|
+
|
|
1213
|
+
self.headers = {
|
|
1214
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
1215
|
+
"Content-Type": "application/json"
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
if self.organization:
|
|
1219
|
+
self.headers["OpenAI-Organization"] = self.organization
|
|
1220
|
+
|
|
1221
|
+
# Model configurations
|
|
1222
|
+
self.model_configs = {
|
|
1223
|
+
"gpt-4": {"max_tokens": 8192, "cost_per_token": 0.00003},
|
|
1224
|
+
"gpt-3.5-turbo": {"max_tokens": 4096, "cost_per_token": 0.000002},
|
|
1225
|
+
"text-embedding-ada-002": {"dimensions": 1536, "cost_per_token": 0.0000001},
|
|
1226
|
+
"text-embedding-3-small": {"dimensions": 1536, "cost_per_token": 0.00000002},
|
|
1227
|
+
"text-embedding-3-large": {"dimensions": 3072, "cost_per_token": 0.00000013},
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
# ========================================
|
|
1231
|
+
# ANTHROPIC SERVICE (ENHANCED)
|
|
1232
|
+
# ========================================
|
|
1233
|
+
|
|
1234
|
+
class AnthropicService(BaseAIService):
|
|
1235
|
+
"""Enhanced Anthropic Claude API service"""
|
|
1236
|
+
|
|
1237
|
+
def __init__(self, config: Dict[str, Any]):
|
|
1238
|
+
"""Initialize Anthropic service"""
|
|
1239
|
+
super().__init__(config)
|
|
1240
|
+
self.api_key = config.get("anthropic_api_key") or os.getenv("ANTHROPIC_API_KEY")
|
|
1241
|
+
self.base_url = "https://api.anthropic.com/v1"
|
|
1242
|
+
|
|
1243
|
+
if not self.api_key:
|
|
1244
|
+
raise ValueError("Anthropic API key is required")
|