@chimerai/cli 0.2.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +293 -0
- package/dist/cli.d.ts +7 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +317 -0
- package/dist/commands/add.d.ts +11 -0
- package/dist/commands/add.d.ts.map +1 -0
- package/dist/commands/add.js +2126 -0
- package/dist/commands/create.d.ts +12 -0
- package/dist/commands/create.d.ts.map +1 -0
- package/dist/commands/create.js +1703 -0
- package/dist/commands/deploy.d.ts +11 -0
- package/dist/commands/deploy.d.ts.map +1 -0
- package/dist/commands/deploy.js +219 -0
- package/dist/commands/dev.d.ts +17 -0
- package/dist/commands/dev.d.ts.map +1 -0
- package/dist/commands/dev.js +206 -0
- package/dist/commands/doctor.d.ts +11 -0
- package/dist/commands/doctor.d.ts.map +1 -0
- package/dist/commands/doctor.js +728 -0
- package/dist/commands/generate.d.ts +19 -0
- package/dist/commands/generate.d.ts.map +1 -0
- package/dist/commands/generate.js +429 -0
- package/dist/commands/init.d.ts +11 -0
- package/dist/commands/init.d.ts.map +1 -0
- package/dist/commands/init.js +269 -0
- package/dist/commands/list.d.ts +12 -0
- package/dist/commands/list.d.ts.map +1 -0
- package/dist/commands/list.js +328 -0
- package/dist/commands/migrate.d.ts +14 -0
- package/dist/commands/migrate.d.ts.map +1 -0
- package/dist/commands/migrate.js +197 -0
- package/dist/commands/plugin.d.ts +10 -0
- package/dist/commands/plugin.d.ts.map +1 -0
- package/dist/commands/plugin.js +239 -0
- package/dist/commands/remove.d.ts +11 -0
- package/dist/commands/remove.d.ts.map +1 -0
- package/dist/commands/remove.js +472 -0
- package/dist/commands/secret.d.ts +12 -0
- package/dist/commands/secret.d.ts.map +1 -0
- package/dist/commands/secret.js +102 -0
- package/dist/commands/setup.d.ts +9 -0
- package/dist/commands/setup.d.ts.map +1 -0
- package/dist/commands/setup.js +788 -0
- package/dist/commands/update.d.ts +14 -0
- package/dist/commands/update.d.ts.map +1 -0
- package/dist/commands/update.js +211 -0
- package/dist/commands/use.d.ts +9 -0
- package/dist/commands/use.d.ts.map +1 -0
- package/dist/commands/use.js +51 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +45 -0
- package/dist/license.d.ts +55 -0
- package/dist/license.d.ts.map +1 -0
- package/dist/license.js +258 -0
- package/dist/scanner.d.ts +31 -0
- package/dist/scanner.d.ts.map +1 -0
- package/dist/scanner.js +113 -0
- package/dist/schema-manager.d.ts +26 -0
- package/dist/schema-manager.d.ts.map +1 -0
- package/dist/schema-manager.js +132 -0
- package/dist/templates/admin.d.ts +49 -0
- package/dist/templates/admin.d.ts.map +1 -0
- package/dist/templates/admin.js +1358 -0
- package/dist/templates/ai-routes.d.ts +17 -0
- package/dist/templates/ai-routes.d.ts.map +1 -0
- package/dist/templates/ai-routes.js +1130 -0
- package/dist/templates/ai-service-tools.d.ts +22 -0
- package/dist/templates/ai-service-tools.d.ts.map +1 -0
- package/dist/templates/ai-service-tools.js +1424 -0
- package/dist/templates/ai-service.d.ts +66 -0
- package/dist/templates/ai-service.d.ts.map +1 -0
- package/dist/templates/ai-service.js +2202 -0
- package/dist/templates/api-routes.d.ts +108 -0
- package/dist/templates/api-routes.d.ts.map +1 -0
- package/dist/templates/api-routes.js +1219 -0
- package/dist/templates/auth.d.ts +48 -0
- package/dist/templates/auth.d.ts.map +1 -0
- package/dist/templates/auth.js +381 -0
- package/dist/templates/billing.d.ts +44 -0
- package/dist/templates/billing.d.ts.map +1 -0
- package/dist/templates/billing.js +551 -0
- package/dist/templates/chat.d.ts +63 -0
- package/dist/templates/chat.d.ts.map +1 -0
- package/dist/templates/chat.js +1979 -0
- package/dist/templates/components.d.ts +22 -0
- package/dist/templates/components.d.ts.map +1 -0
- package/dist/templates/components.js +672 -0
- package/dist/templates/config.d.ts +6 -0
- package/dist/templates/config.d.ts.map +1 -0
- package/dist/templates/config.js +86 -0
- package/dist/templates/docker.d.ts +25 -0
- package/dist/templates/docker.d.ts.map +1 -0
- package/dist/templates/docker.js +165 -0
- package/dist/templates/gdpr.d.ts +16 -0
- package/dist/templates/gdpr.d.ts.map +1 -0
- package/dist/templates/gdpr.js +259 -0
- package/dist/templates/index.d.ts +77 -0
- package/dist/templates/index.d.ts.map +1 -0
- package/dist/templates/index.js +339 -0
- package/dist/templates/layout.d.ts +67 -0
- package/dist/templates/layout.d.ts.map +1 -0
- package/dist/templates/layout.js +670 -0
- package/dist/templates/mfa.d.ts +23 -0
- package/dist/templates/mfa.d.ts.map +1 -0
- package/dist/templates/mfa.js +353 -0
- package/dist/templates/middleware.d.ts +12 -0
- package/dist/templates/middleware.d.ts.map +1 -0
- package/dist/templates/middleware.js +116 -0
- package/dist/templates/prisma.d.ts +35 -0
- package/dist/templates/prisma.d.ts.map +1 -0
- package/dist/templates/prisma.js +724 -0
- package/dist/templates/provider-routes.d.ts +21 -0
- package/dist/templates/provider-routes.d.ts.map +1 -0
- package/dist/templates/provider-routes.js +1203 -0
- package/dist/templates/rag.d.ts +48 -0
- package/dist/templates/rag.d.ts.map +1 -0
- package/dist/templates/rag.js +532 -0
- package/dist/templates/widget.d.ts +64 -0
- package/dist/templates/widget.d.ts.map +1 -0
- package/dist/templates/widget.js +1360 -0
- package/dist/utils/provider-db.d.ts +63 -0
- package/dist/utils/provider-db.d.ts.map +1 -0
- package/dist/utils/provider-db.js +300 -0
- package/dist/utils.d.ts +78 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +330 -0
- package/package.json +60 -0
|
@@ -0,0 +1,2202 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* AI Service Template Generators
|
|
4
|
+
*
|
|
5
|
+
* Generates Python service files for the modular AI-Service.
|
|
6
|
+
* Python files are embedded as string literals (same pattern as other CLI templates).
|
|
7
|
+
*
|
|
8
|
+
* Modules:
|
|
9
|
+
* core — main.py, config.py, provider_client.py (always included)
|
|
10
|
+
* chat — chat_service.py, model_service.py, moderation_service.py
|
|
11
|
+
* rag — rag_service.py, vector_store.py, embedding_service.py
|
|
12
|
+
* guardrails — guardrails_service.py
|
|
13
|
+
* tools — services/tools/*.py (individually selectable)
|
|
14
|
+
*/
|
|
15
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
16
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
17
|
+
};
|
|
18
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
19
|
+
exports.TOOL_INFO = void 0;
|
|
20
|
+
exports.readAiManifest = readAiManifest;
|
|
21
|
+
exports.writeAiManifest = writeAiManifest;
|
|
22
|
+
exports.generateProviderClient = generateProviderClient;
|
|
23
|
+
exports.generateServicesInit = generateServicesInit;
|
|
24
|
+
exports.generateChatService = generateChatService;
|
|
25
|
+
exports.generateModelService = generateModelService;
|
|
26
|
+
exports.generateModerationService = generateModerationService;
|
|
27
|
+
exports.generateEmbeddingService = generateEmbeddingService;
|
|
28
|
+
exports.generateFileExtractor = generateFileExtractor;
|
|
29
|
+
exports.generateRagService = generateRagService;
|
|
30
|
+
exports.generateVectorStore = generateVectorStore;
|
|
31
|
+
exports.generateGuardrailsService = generateGuardrailsService;
|
|
32
|
+
exports.generateAiServiceMain = generateAiServiceMain;
|
|
33
|
+
exports.generateAiServiceConfig = generateAiServiceConfig;
|
|
34
|
+
exports.generateAiServiceModels = generateAiServiceModels;
|
|
35
|
+
exports.generateAiServiceRequirements = generateAiServiceRequirements;
|
|
36
|
+
exports.generateAiServiceDockerfile = generateAiServiceDockerfile;
|
|
37
|
+
exports.generateAiServiceReadme = generateAiServiceReadme;
|
|
38
|
+
exports.generateDockerComposeAiService = generateDockerComposeAiService;
|
|
39
|
+
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
40
|
+
const path_1 = __importDefault(require("path"));
|
|
41
|
+
// ============================================================================
|
|
42
|
+
// Tool Metadata (used by add command for interactive menu)
|
|
43
|
+
// ============================================================================
|
|
44
|
+
exports.TOOL_INFO = {
|
|
45
|
+
web_tools: {
|
|
46
|
+
displayName: 'Web Search & Scraping',
|
|
47
|
+
description: 'DuckDuckGo search, web scraping',
|
|
48
|
+
dependencies: ['ddgs>=9.0.0', 'beautifulsoup4>=4.12.2', 'lxml>=5.0.0', 'markdownify>=0.12.1'],
|
|
49
|
+
},
|
|
50
|
+
document_tools: {
|
|
51
|
+
displayName: 'Document Processing',
|
|
52
|
+
description: 'PDF, DOCX, XLSX, PPTX extraction',
|
|
53
|
+
dependencies: [
|
|
54
|
+
'pdfplumber>=0.10.3',
|
|
55
|
+
'python-docx>=1.1.0',
|
|
56
|
+
'openpyxl>=3.1.2',
|
|
57
|
+
'python-pptx>=0.6.23',
|
|
58
|
+
],
|
|
59
|
+
},
|
|
60
|
+
code_tools: {
|
|
61
|
+
displayName: 'Code Execution',
|
|
62
|
+
description: 'Sandboxed Python execution',
|
|
63
|
+
dependencies: ['RestrictedPython>=7.0'],
|
|
64
|
+
},
|
|
65
|
+
nlp_tools: {
|
|
66
|
+
displayName: 'NLP Toolkit',
|
|
67
|
+
description: 'Summarize, Sentiment, Extract, Classify, Q&A',
|
|
68
|
+
dependencies: ['tiktoken>=0.5.0'],
|
|
69
|
+
},
|
|
70
|
+
vision_tools: {
|
|
71
|
+
displayName: 'Vision / Image Analysis',
|
|
72
|
+
description: 'Image description via LLM',
|
|
73
|
+
dependencies: ['pillow>=10.2.0'],
|
|
74
|
+
},
|
|
75
|
+
google_sheets_tools: {
|
|
76
|
+
displayName: 'Google Sheets Integration',
|
|
77
|
+
description: 'Read, write, append, create',
|
|
78
|
+
dependencies: ['google-api-python-client>=2.111.0', 'google-auth>=2.25.2'],
|
|
79
|
+
},
|
|
80
|
+
airtable_tools: {
|
|
81
|
+
displayName: 'Airtable Integration',
|
|
82
|
+
description: 'CRUD operations',
|
|
83
|
+
dependencies: ['pyairtable>=2.3.3'],
|
|
84
|
+
},
|
|
85
|
+
deepl_tools: {
|
|
86
|
+
displayName: 'Translation (DeepL)',
|
|
87
|
+
description: 'Text & document translation',
|
|
88
|
+
dependencies: ['deepl>=1.20.0'],
|
|
89
|
+
},
|
|
90
|
+
webhook_tools: {
|
|
91
|
+
displayName: 'Webhooks',
|
|
92
|
+
description: 'n8n, Zapier, Make.com, Slack',
|
|
93
|
+
dependencies: [], // httpx already in core
|
|
94
|
+
},
|
|
95
|
+
};
|
|
96
|
+
// ============================================================================
|
|
97
|
+
// Manifest Management
|
|
98
|
+
// ============================================================================
|
|
99
|
+
function readAiManifest(targetDir) {
|
|
100
|
+
const manifestPath = path_1.default.join(targetDir, 'services', 'ai', '.chimerai-ai');
|
|
101
|
+
if (!fs_extra_1.default.existsSync(manifestPath))
|
|
102
|
+
return null;
|
|
103
|
+
try {
|
|
104
|
+
return JSON.parse(fs_extra_1.default.readFileSync(manifestPath, 'utf-8'));
|
|
105
|
+
}
|
|
106
|
+
catch {
|
|
107
|
+
return null;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
function writeAiManifest(targetDir, manifest) {
|
|
111
|
+
const manifestPath = path_1.default.join(targetDir, 'services', 'ai', '.chimerai-ai');
|
|
112
|
+
fs_extra_1.default.ensureDirSync(path_1.default.dirname(manifestPath));
|
|
113
|
+
fs_extra_1.default.writeFileSync(manifestPath, JSON.stringify(manifest, null, 2));
|
|
114
|
+
}
|
|
115
|
+
// ============================================================================
|
|
116
|
+
// Core: provider_client.py — 1:1 copy from monorepo
|
|
117
|
+
// ============================================================================
|
|
118
|
+
function generateProviderClient() {
|
|
119
|
+
return `"""
|
|
120
|
+
Provider Client — Centralized provider resolution.
|
|
121
|
+
Auto-generated by ChimerAI CLI. Based on chimerai-kickstart monorepo.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
import time
|
|
125
|
+
from dataclasses import dataclass, field
|
|
126
|
+
from typing import Dict, List, Optional
|
|
127
|
+
import httpx
|
|
128
|
+
import structlog
|
|
129
|
+
|
|
130
|
+
from config import settings
|
|
131
|
+
|
|
132
|
+
logger = structlog.get_logger()
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@dataclass
|
|
136
|
+
class ProviderModel:
|
|
137
|
+
id: str
|
|
138
|
+
name: str
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class ProviderInfo:
|
|
143
|
+
id: str
|
|
144
|
+
name: str
|
|
145
|
+
type: str
|
|
146
|
+
api_key: Optional[str] = None
|
|
147
|
+
base_url: Optional[str] = None
|
|
148
|
+
is_default: bool = False
|
|
149
|
+
is_active: bool = True
|
|
150
|
+
models: List[ProviderModel] = field(default_factory=list)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class ProviderClient:
|
|
154
|
+
"""
|
|
155
|
+
Client that fetches provider configuration from the Frontend Internal API.
|
|
156
|
+
|
|
157
|
+
Caches results for 5 minutes to avoid hitting the Frontend on every request.
|
|
158
|
+
Falls back gracefully to environment-based keys if the Frontend API is
|
|
159
|
+
unavailable (e.g. during local development without the full stack).
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
CACHE_TTL = 300 # 5 minutes
|
|
163
|
+
|
|
164
|
+
def __init__(self):
|
|
165
|
+
self._client: Optional[httpx.AsyncClient] = None
|
|
166
|
+
self._cache: Dict[str, ProviderInfo] = {}
|
|
167
|
+
self._all_providers: Optional[List[ProviderInfo]] = None
|
|
168
|
+
self._cache_timestamp: float = 0
|
|
169
|
+
self._available: Optional[bool] = None
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def client(self) -> httpx.AsyncClient:
|
|
173
|
+
if self._client is None or self._client.is_closed:
|
|
174
|
+
self._client = httpx.AsyncClient(
|
|
175
|
+
base_url=settings.frontend_url,
|
|
176
|
+
headers={
|
|
177
|
+
"Authorization": f"Bearer {settings.internal_api_token}",
|
|
178
|
+
"Content-Type": "application/json",
|
|
179
|
+
},
|
|
180
|
+
timeout=10.0,
|
|
181
|
+
)
|
|
182
|
+
return self._client
|
|
183
|
+
|
|
184
|
+
def _cache_valid(self) -> bool:
|
|
185
|
+
return (time.time() - self._cache_timestamp) < self.CACHE_TTL
|
|
186
|
+
|
|
187
|
+
def invalidate_cache(self):
|
|
188
|
+
self._cache.clear()
|
|
189
|
+
self._all_providers = None
|
|
190
|
+
self._cache_timestamp = 0
|
|
191
|
+
self._available = None
|
|
192
|
+
logger.info("provider_cache_invalidated")
|
|
193
|
+
|
|
194
|
+
async def is_available(self) -> bool:
|
|
195
|
+
if self._available is not None and self._cache_valid():
|
|
196
|
+
return self._available
|
|
197
|
+
|
|
198
|
+
if not settings.internal_api_token:
|
|
199
|
+
self._available = False
|
|
200
|
+
return False
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
resp = await self.client.get("/api/internal/providers")
|
|
204
|
+
self._available = resp.status_code == 200
|
|
205
|
+
except Exception:
|
|
206
|
+
self._available = False
|
|
207
|
+
|
|
208
|
+
return self._available
|
|
209
|
+
|
|
210
|
+
async def get_all_providers(self) -> List[ProviderInfo]:
|
|
211
|
+
if self._all_providers is not None and self._cache_valid():
|
|
212
|
+
return self._all_providers
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
resp = await self.client.get("/api/internal/providers")
|
|
216
|
+
resp.raise_for_status()
|
|
217
|
+
data = resp.json()
|
|
218
|
+
|
|
219
|
+
providers = []
|
|
220
|
+
for p in data:
|
|
221
|
+
provider = ProviderInfo(
|
|
222
|
+
id=p["id"],
|
|
223
|
+
name=p["name"],
|
|
224
|
+
type=p.get("type", "openai"),
|
|
225
|
+
api_key=p.get("apiKey") or p.get("api_key"),
|
|
226
|
+
base_url=p.get("baseUrl") or p.get("base_url"),
|
|
227
|
+
is_default=p.get("isDefault", False),
|
|
228
|
+
is_active=p.get("isActive", True),
|
|
229
|
+
models=[
|
|
230
|
+
ProviderModel(id=m["id"], name=m["name"])
|
|
231
|
+
for m in p.get("models", [])
|
|
232
|
+
],
|
|
233
|
+
)
|
|
234
|
+
providers.append(provider)
|
|
235
|
+
self._cache[provider.id] = provider
|
|
236
|
+
|
|
237
|
+
self._all_providers = providers
|
|
238
|
+
self._cache_timestamp = time.time()
|
|
239
|
+
logger.info("providers_loaded", count=len(providers))
|
|
240
|
+
return providers
|
|
241
|
+
|
|
242
|
+
except Exception as e:
|
|
243
|
+
logger.warning("get_providers_failed", error=str(e))
|
|
244
|
+
return self._all_providers or []
|
|
245
|
+
|
|
246
|
+
async def get_provider(self, provider_id: str) -> Optional[ProviderInfo]:
|
|
247
|
+
if provider_id in self._cache and self._cache_valid():
|
|
248
|
+
return self._cache[provider_id]
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
resp = await self.client.get(f"/api/internal/providers/{provider_id}")
|
|
252
|
+
resp.raise_for_status()
|
|
253
|
+
p = resp.json()
|
|
254
|
+
|
|
255
|
+
provider = ProviderInfo(
|
|
256
|
+
id=p["id"],
|
|
257
|
+
name=p["name"],
|
|
258
|
+
type=p.get("type", "openai"),
|
|
259
|
+
api_key=p.get("apiKey") or p.get("api_key"),
|
|
260
|
+
base_url=p.get("baseUrl") or p.get("base_url"),
|
|
261
|
+
is_default=p.get("isDefault", False),
|
|
262
|
+
is_active=p.get("isActive", True),
|
|
263
|
+
)
|
|
264
|
+
self._cache[provider_id] = provider
|
|
265
|
+
self._cache_timestamp = time.time()
|
|
266
|
+
return provider
|
|
267
|
+
|
|
268
|
+
except Exception as e:
|
|
269
|
+
logger.warning("get_provider_failed", provider_id=provider_id, error=str(e))
|
|
270
|
+
return self._cache.get(provider_id)
|
|
271
|
+
|
|
272
|
+
async def get_default_provider(self, provider_type: Optional[str] = None) -> Optional[ProviderInfo]:
|
|
273
|
+
providers = await self.get_all_providers()
|
|
274
|
+
for p in providers:
|
|
275
|
+
if p.is_default and p.is_active:
|
|
276
|
+
if provider_type is None or p.type == provider_type:
|
|
277
|
+
return p
|
|
278
|
+
active = [p for p in providers if p.is_active]
|
|
279
|
+
if provider_type:
|
|
280
|
+
active = [p for p in active if p.type == provider_type]
|
|
281
|
+
return active[0] if active else None
|
|
282
|
+
|
|
283
|
+
async def get_api_key(self, provider_id: str) -> Optional[str]:
|
|
284
|
+
provider = await self.get_provider(provider_id)
|
|
285
|
+
return provider.api_key if provider else None
|
|
286
|
+
|
|
287
|
+
async def report_usage(
|
|
288
|
+
self,
|
|
289
|
+
provider_id: str,
|
|
290
|
+
user_id: str,
|
|
291
|
+
model: str,
|
|
292
|
+
prompt_tokens: int,
|
|
293
|
+
completion_tokens: int,
|
|
294
|
+
endpoint: str = "/api/chat",
|
|
295
|
+
) -> None:
|
|
296
|
+
try:
|
|
297
|
+
await self.client.post(
|
|
298
|
+
f"/api/internal/providers/{provider_id}/usage",
|
|
299
|
+
json={
|
|
300
|
+
"userId": user_id,
|
|
301
|
+
"model": model,
|
|
302
|
+
"promptTokens": prompt_tokens,
|
|
303
|
+
"completionTokens": completion_tokens,
|
|
304
|
+
"endpoint": endpoint,
|
|
305
|
+
},
|
|
306
|
+
)
|
|
307
|
+
except Exception as e:
|
|
308
|
+
logger.warning("report_usage_failed", error=str(e))
|
|
309
|
+
|
|
310
|
+
async def close(self):
|
|
311
|
+
if self._client and not self._client.is_closed:
|
|
312
|
+
await self._client.aclose()
|
|
313
|
+
logger.info("provider_client_closed")
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
provider_client = ProviderClient()
|
|
317
|
+
`;
|
|
318
|
+
}
|
|
319
|
+
// ============================================================================
|
|
320
|
+
// Core: services/__init__.py
|
|
321
|
+
// ============================================================================
|
|
322
|
+
function generateServicesInit() {
|
|
323
|
+
return `# Services package — Auto-generated by ChimerAI CLI
|
|
324
|
+
`;
|
|
325
|
+
}
|
|
326
|
+
// ============================================================================
|
|
327
|
+
// Chat Module: chat_service.py
|
|
328
|
+
// ============================================================================
|
|
329
|
+
function generateChatService() {
|
|
330
|
+
return `"""Chat completion service using LiteLLM. Auto-generated by ChimerAI CLI."""
|
|
331
|
+
|
|
332
|
+
import json
|
|
333
|
+
import time
|
|
334
|
+
import uuid
|
|
335
|
+
from typing import AsyncIterator, Optional
|
|
336
|
+
import litellm
|
|
337
|
+
from litellm import acompletion
|
|
338
|
+
import structlog
|
|
339
|
+
|
|
340
|
+
from models import (
|
|
341
|
+
ChatCompletionRequest,
|
|
342
|
+
ChatCompletionResponse,
|
|
343
|
+
ChatCompletionChoice,
|
|
344
|
+
ChatCompletionChunk,
|
|
345
|
+
ChatCompletionChunkChoice,
|
|
346
|
+
DeltaMessage,
|
|
347
|
+
Usage,
|
|
348
|
+
ChatMessage,
|
|
349
|
+
)
|
|
350
|
+
from config import settings
|
|
351
|
+
from provider_client import provider_client
|
|
352
|
+
|
|
353
|
+
logger = structlog.get_logger()
|
|
354
|
+
|
|
355
|
+
litellm.set_verbose = False
|
|
356
|
+
litellm.drop_params = True
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
class ChatService:
|
|
360
|
+
"""Service for handling chat completions using LiteLLM."""
|
|
361
|
+
|
|
362
|
+
async def _resolve_provider_kwargs(
|
|
363
|
+
self,
|
|
364
|
+
provider_id: Optional[str] = None,
|
|
365
|
+
model: Optional[str] = None,
|
|
366
|
+
) -> dict:
|
|
367
|
+
kwargs: dict = {}
|
|
368
|
+
|
|
369
|
+
if provider_id:
|
|
370
|
+
provider = await provider_client.get_provider(provider_id)
|
|
371
|
+
if provider:
|
|
372
|
+
if provider.api_key:
|
|
373
|
+
kwargs["api_key"] = provider.api_key
|
|
374
|
+
if provider.base_url:
|
|
375
|
+
kwargs["api_base"] = provider.base_url
|
|
376
|
+
return kwargs
|
|
377
|
+
|
|
378
|
+
if await provider_client.is_available():
|
|
379
|
+
default = await provider_client.get_default_provider()
|
|
380
|
+
if default and default.api_key:
|
|
381
|
+
kwargs["api_key"] = default.api_key
|
|
382
|
+
if default.base_url:
|
|
383
|
+
kwargs["api_base"] = default.base_url
|
|
384
|
+
return kwargs
|
|
385
|
+
|
|
386
|
+
return kwargs
|
|
387
|
+
|
|
388
|
+
async def create_completion(
|
|
389
|
+
self,
|
|
390
|
+
request: ChatCompletionRequest,
|
|
391
|
+
provider_id: Optional[str] = None,
|
|
392
|
+
user_id: Optional[str] = None,
|
|
393
|
+
) -> ChatCompletionResponse:
|
|
394
|
+
logger.info("chat_completion_request", model=request.model, stream=False)
|
|
395
|
+
|
|
396
|
+
try:
|
|
397
|
+
provider_kwargs = await self._resolve_provider_kwargs(
|
|
398
|
+
provider_id=provider_id, model=request.model,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
messages = [
|
|
402
|
+
{"role": msg.role.value, "content": msg.content}
|
|
403
|
+
for msg in request.messages
|
|
404
|
+
]
|
|
405
|
+
|
|
406
|
+
response = await acompletion(
|
|
407
|
+
model=request.model,
|
|
408
|
+
messages=messages,
|
|
409
|
+
temperature=request.temperature,
|
|
410
|
+
max_tokens=request.max_tokens,
|
|
411
|
+
top_p=request.top_p,
|
|
412
|
+
frequency_penalty=request.frequency_penalty,
|
|
413
|
+
presence_penalty=request.presence_penalty,
|
|
414
|
+
user=request.user,
|
|
415
|
+
stream=False,
|
|
416
|
+
**provider_kwargs,
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
result = ChatCompletionResponse(
|
|
420
|
+
id=response.id or f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
|
421
|
+
object="chat.completion",
|
|
422
|
+
created=int(time.time()),
|
|
423
|
+
model=response.model,
|
|
424
|
+
choices=[
|
|
425
|
+
ChatCompletionChoice(
|
|
426
|
+
index=choice.index,
|
|
427
|
+
message=ChatMessage(
|
|
428
|
+
role=choice.message.role,
|
|
429
|
+
content=choice.message.content or "",
|
|
430
|
+
),
|
|
431
|
+
finish_reason=choice.finish_reason or "stop",
|
|
432
|
+
)
|
|
433
|
+
for choice in response.choices
|
|
434
|
+
],
|
|
435
|
+
usage=Usage(
|
|
436
|
+
prompt_tokens=response.usage.prompt_tokens,
|
|
437
|
+
completion_tokens=response.usage.completion_tokens,
|
|
438
|
+
total_tokens=response.usage.total_tokens,
|
|
439
|
+
),
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
if provider_id and user_id:
|
|
443
|
+
await provider_client.report_usage(
|
|
444
|
+
provider_id=provider_id,
|
|
445
|
+
user_id=user_id,
|
|
446
|
+
model=request.model,
|
|
447
|
+
prompt_tokens=response.usage.prompt_tokens,
|
|
448
|
+
completion_tokens=response.usage.completion_tokens,
|
|
449
|
+
endpoint="/api/chat",
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
return result
|
|
453
|
+
|
|
454
|
+
except Exception as e:
|
|
455
|
+
logger.error("chat_completion_error", error=str(e))
|
|
456
|
+
raise
|
|
457
|
+
|
|
458
|
+
async def create_streaming_completion(
|
|
459
|
+
self,
|
|
460
|
+
request: ChatCompletionRequest,
|
|
461
|
+
provider_id: Optional[str] = None,
|
|
462
|
+
user_id: Optional[str] = None,
|
|
463
|
+
) -> AsyncIterator[str]:
|
|
464
|
+
logger.info("chat_completion_request", model=request.model, stream=True)
|
|
465
|
+
|
|
466
|
+
try:
|
|
467
|
+
provider_kwargs = await self._resolve_provider_kwargs(
|
|
468
|
+
provider_id=provider_id, model=request.model,
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
messages = [
|
|
472
|
+
{"role": msg.role.value, "content": msg.content}
|
|
473
|
+
for msg in request.messages
|
|
474
|
+
]
|
|
475
|
+
|
|
476
|
+
response = await acompletion(
|
|
477
|
+
model=request.model,
|
|
478
|
+
messages=messages,
|
|
479
|
+
temperature=request.temperature,
|
|
480
|
+
max_tokens=request.max_tokens,
|
|
481
|
+
top_p=request.top_p,
|
|
482
|
+
frequency_penalty=request.frequency_penalty,
|
|
483
|
+
presence_penalty=request.presence_penalty,
|
|
484
|
+
user=request.user,
|
|
485
|
+
stream=True,
|
|
486
|
+
**provider_kwargs,
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
total_prompt_tokens = 0
|
|
490
|
+
total_completion_tokens = 0
|
|
491
|
+
|
|
492
|
+
async for chunk in response:
|
|
493
|
+
if not chunk.choices:
|
|
494
|
+
continue
|
|
495
|
+
|
|
496
|
+
choice = chunk.choices[0]
|
|
497
|
+
|
|
498
|
+
if hasattr(chunk, "usage") and chunk.usage:
|
|
499
|
+
total_prompt_tokens = getattr(chunk.usage, "prompt_tokens", 0) or 0
|
|
500
|
+
total_completion_tokens = getattr(chunk.usage, "completion_tokens", 0) or 0
|
|
501
|
+
|
|
502
|
+
chunk_response = ChatCompletionChunk(
|
|
503
|
+
id=chunk.id or f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
|
504
|
+
object="chat.completion.chunk",
|
|
505
|
+
created=int(time.time()),
|
|
506
|
+
model=chunk.model,
|
|
507
|
+
choices=[
|
|
508
|
+
ChatCompletionChunkChoice(
|
|
509
|
+
index=choice.index,
|
|
510
|
+
delta=DeltaMessage(
|
|
511
|
+
role=getattr(choice.delta, "role", None),
|
|
512
|
+
content=getattr(choice.delta, "content", None),
|
|
513
|
+
),
|
|
514
|
+
finish_reason=choice.finish_reason,
|
|
515
|
+
)
|
|
516
|
+
],
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
yield f"data: {chunk_response.model_dump_json()}\\n\\n"
|
|
520
|
+
|
|
521
|
+
if provider_id and user_id and (total_prompt_tokens or total_completion_tokens):
|
|
522
|
+
await provider_client.report_usage(
|
|
523
|
+
provider_id=provider_id,
|
|
524
|
+
user_id=user_id,
|
|
525
|
+
model=request.model,
|
|
526
|
+
prompt_tokens=total_prompt_tokens,
|
|
527
|
+
completion_tokens=total_completion_tokens,
|
|
528
|
+
endpoint="/api/chat/stream",
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
yield "data: [DONE]\\n\\n"
|
|
532
|
+
|
|
533
|
+
except Exception as e:
|
|
534
|
+
logger.error("streaming_completion_error", error=str(e))
|
|
535
|
+
error_payload = json.dumps({"error": {"message": str(e), "type": "server_error"}})
|
|
536
|
+
yield f"data: {error_payload}\\n\\n"
|
|
537
|
+
yield "data: [DONE]\\n\\n"
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
chat_service = ChatService()
|
|
541
|
+
`;
|
|
542
|
+
}
|
|
543
|
+
// ============================================================================
|
|
544
|
+
// Chat Module: model_service.py
|
|
545
|
+
// ============================================================================
|
|
546
|
+
function generateModelService() {
|
|
547
|
+
return `"""Model listing service. Auto-generated by ChimerAI CLI."""
|
|
548
|
+
|
|
549
|
+
import time
|
|
550
|
+
import structlog
|
|
551
|
+
from typing import Optional
|
|
552
|
+
|
|
553
|
+
from models import ModelsResponse, ModelInfo
|
|
554
|
+
from provider_client import provider_client
|
|
555
|
+
from config import settings
|
|
556
|
+
|
|
557
|
+
logger = structlog.get_logger()
|
|
558
|
+
|
|
559
|
+
# Fallback models when provider APIs are not reachable
|
|
560
|
+
FALLBACK_MODELS = [
|
|
561
|
+
ModelInfo(id="gpt-4", object="model", created=0, owned_by="openai"),
|
|
562
|
+
ModelInfo(id="gpt-4-turbo-preview", object="model", created=0, owned_by="openai"),
|
|
563
|
+
ModelInfo(id="gpt-3.5-turbo", object="model", created=0, owned_by="openai"),
|
|
564
|
+
ModelInfo(id="claude-3-opus-20240229", object="model", created=0, owned_by="anthropic"),
|
|
565
|
+
ModelInfo(id="claude-3-sonnet-20240229", object="model", created=0, owned_by="anthropic"),
|
|
566
|
+
ModelInfo(id="text-embedding-ada-002", object="model", created=0, owned_by="openai"),
|
|
567
|
+
]
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
class ModelService:
|
|
571
|
+
"""Service for listing available models with caching."""
|
|
572
|
+
|
|
573
|
+
def __init__(self, cache_ttl: int = 300):
|
|
574
|
+
self._cache: Optional[ModelsResponse] = None
|
|
575
|
+
self._cache_time: float = 0
|
|
576
|
+
self._cache_ttl = cache_ttl # seconds (default 5 min)
|
|
577
|
+
|
|
578
|
+
def _is_cache_valid(self) -> bool:
|
|
579
|
+
return self._cache is not None and (time.time() - self._cache_time) < self._cache_ttl
|
|
580
|
+
|
|
581
|
+
async def list_models(self) -> ModelsResponse:
|
|
582
|
+
logger.info("list_models_request")
|
|
583
|
+
|
|
584
|
+
# Return cached result if valid
|
|
585
|
+
if self._is_cache_valid():
|
|
586
|
+
logger.debug("list_models_cache_hit")
|
|
587
|
+
return self._cache
|
|
588
|
+
|
|
589
|
+
try:
|
|
590
|
+
models: list[ModelInfo] = []
|
|
591
|
+
|
|
592
|
+
# Try to fetch models from centralized providers
|
|
593
|
+
if await provider_client.is_available():
|
|
594
|
+
providers = await provider_client.get_all_providers()
|
|
595
|
+
for provider in providers:
|
|
596
|
+
try:
|
|
597
|
+
# Each provider may expose a list of models
|
|
598
|
+
if hasattr(provider, "models") and provider.models:
|
|
599
|
+
for m in provider.models:
|
|
600
|
+
model_id = m.get("id") or m.get("model_id") if isinstance(m, dict) else str(m)
|
|
601
|
+
if model_id:
|
|
602
|
+
models.append(ModelInfo(
|
|
603
|
+
id=model_id,
|
|
604
|
+
object="model",
|
|
605
|
+
created=int(time.time()),
|
|
606
|
+
owned_by=provider.name or "unknown",
|
|
607
|
+
))
|
|
608
|
+
except Exception as exc:
|
|
609
|
+
logger.warning("provider_models_error", provider=provider.name, error=str(exc))
|
|
610
|
+
|
|
611
|
+
# Try OpenAI API directly as additional source
|
|
612
|
+
if settings.openai_api_key:
|
|
613
|
+
try:
|
|
614
|
+
from openai import AsyncOpenAI
|
|
615
|
+
client = AsyncOpenAI(api_key=settings.openai_api_key)
|
|
616
|
+
openai_models = await client.models.list()
|
|
617
|
+
for m in openai_models.data:
|
|
618
|
+
if not any(existing.id == m.id for existing in models):
|
|
619
|
+
models.append(ModelInfo(
|
|
620
|
+
id=m.id,
|
|
621
|
+
object="model",
|
|
622
|
+
created=m.created or int(time.time()),
|
|
623
|
+
owned_by=m.owned_by or "openai",
|
|
624
|
+
))
|
|
625
|
+
except Exception as exc:
|
|
626
|
+
logger.warning("openai_models_fetch_error", error=str(exc))
|
|
627
|
+
|
|
628
|
+
# Fallback if no dynamic models found
|
|
629
|
+
if not models:
|
|
630
|
+
logger.info("list_models_fallback")
|
|
631
|
+
models = list(FALLBACK_MODELS)
|
|
632
|
+
|
|
633
|
+
result = ModelsResponse(object="list", data=models)
|
|
634
|
+
|
|
635
|
+
# Cache the result
|
|
636
|
+
self._cache = result
|
|
637
|
+
self._cache_time = time.time()
|
|
638
|
+
|
|
639
|
+
logger.info("list_models_success", count=len(models))
|
|
640
|
+
return result
|
|
641
|
+
|
|
642
|
+
except Exception as e:
|
|
643
|
+
logger.error("list_models_error", error=str(e))
|
|
644
|
+
# Return fallback on error
|
|
645
|
+
return ModelsResponse(object="list", data=list(FALLBACK_MODELS))
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
model_service = ModelService()
|
|
649
|
+
`;
|
|
650
|
+
}
|
|
651
|
+
// ============================================================================
|
|
652
|
+
// Chat Module: moderation_service.py
|
|
653
|
+
// ============================================================================
|
|
654
|
+
function generateModerationService() {
|
|
655
|
+
return `"""Content moderation service using OpenAI. Auto-generated by ChimerAI CLI."""
|
|
656
|
+
|
|
657
|
+
import uuid
|
|
658
|
+
from typing import Optional
|
|
659
|
+
from openai import AsyncOpenAI
|
|
660
|
+
import structlog
|
|
661
|
+
|
|
662
|
+
from models import (
|
|
663
|
+
ModerationRequest,
|
|
664
|
+
ModerationResponse,
|
|
665
|
+
ModerationResult,
|
|
666
|
+
ModerationCategories,
|
|
667
|
+
ModerationScores,
|
|
668
|
+
)
|
|
669
|
+
from config import settings
|
|
670
|
+
from provider_client import provider_client
|
|
671
|
+
|
|
672
|
+
logger = structlog.get_logger()
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
class ModerationService:
|
|
676
|
+
"""Content moderation using OpenAI moderation API."""
|
|
677
|
+
|
|
678
|
+
async def _get_client(self, provider_id: Optional[str] = None) -> AsyncOpenAI:
|
|
679
|
+
if provider_id:
|
|
680
|
+
provider = await provider_client.get_provider(provider_id)
|
|
681
|
+
if provider and provider.api_key:
|
|
682
|
+
return AsyncOpenAI(api_key=provider.api_key)
|
|
683
|
+
|
|
684
|
+
if await provider_client.is_available():
|
|
685
|
+
default = await provider_client.get_default_provider("openai")
|
|
686
|
+
if default and default.api_key:
|
|
687
|
+
return AsyncOpenAI(api_key=default.api_key)
|
|
688
|
+
|
|
689
|
+
api_key = settings.openai_api_key
|
|
690
|
+
if not api_key:
|
|
691
|
+
raise ValueError(
|
|
692
|
+
"No OpenAI API key available for moderation. "
|
|
693
|
+
"Configure an OpenAI provider at /providers or set OPENAI_API_KEY."
|
|
694
|
+
)
|
|
695
|
+
return AsyncOpenAI(api_key=api_key)
|
|
696
|
+
|
|
697
|
+
async def moderate_content(
|
|
698
|
+
self,
|
|
699
|
+
request: ModerationRequest,
|
|
700
|
+
provider_id: Optional[str] = None,
|
|
701
|
+
user_id: Optional[str] = None,
|
|
702
|
+
) -> ModerationResponse:
|
|
703
|
+
logger.info("moderation_request", model=request.model)
|
|
704
|
+
|
|
705
|
+
try:
|
|
706
|
+
client = await self._get_client(provider_id=provider_id)
|
|
707
|
+
|
|
708
|
+
if isinstance(request.input, str):
|
|
709
|
+
inputs = [request.input]
|
|
710
|
+
else:
|
|
711
|
+
inputs = request.input
|
|
712
|
+
|
|
713
|
+
response = await client.moderations.create(
|
|
714
|
+
input=inputs,
|
|
715
|
+
model=request.model or "omni-moderation-latest",
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
results = []
|
|
719
|
+
for result in response.results:
|
|
720
|
+
results.append(
|
|
721
|
+
ModerationResult(
|
|
722
|
+
flagged=result.flagged,
|
|
723
|
+
categories=ModerationCategories(
|
|
724
|
+
hate=result.categories.hate,
|
|
725
|
+
hate_threatening=result.categories.hate_threatening,
|
|
726
|
+
harassment=result.categories.harassment,
|
|
727
|
+
harassment_threatening=result.categories.harassment_threatening,
|
|
728
|
+
self_harm=result.categories.self_harm,
|
|
729
|
+
self_harm_intent=result.categories.self_harm_intent,
|
|
730
|
+
self_harm_instructions=result.categories.self_harm_instructions,
|
|
731
|
+
sexual=result.categories.sexual,
|
|
732
|
+
sexual_minors=result.categories.sexual_minors,
|
|
733
|
+
violence=result.categories.violence,
|
|
734
|
+
violence_graphic=result.categories.violence_graphic,
|
|
735
|
+
),
|
|
736
|
+
category_scores=ModerationScores(
|
|
737
|
+
hate=result.category_scores.hate,
|
|
738
|
+
hate_threatening=result.category_scores.hate_threatening,
|
|
739
|
+
harassment=result.category_scores.harassment,
|
|
740
|
+
harassment_threatening=result.category_scores.harassment_threatening,
|
|
741
|
+
self_harm=result.category_scores.self_harm,
|
|
742
|
+
self_harm_intent=result.category_scores.self_harm_intent,
|
|
743
|
+
self_harm_instructions=result.category_scores.self_harm_instructions,
|
|
744
|
+
sexual=result.category_scores.sexual,
|
|
745
|
+
sexual_minors=result.category_scores.sexual_minors,
|
|
746
|
+
violence=result.category_scores.violence,
|
|
747
|
+
violence_graphic=result.category_scores.violence_graphic,
|
|
748
|
+
),
|
|
749
|
+
)
|
|
750
|
+
)
|
|
751
|
+
|
|
752
|
+
return ModerationResponse(
|
|
753
|
+
id=f"modr-{uuid.uuid4().hex[:8]}",
|
|
754
|
+
model=request.model or "omni-moderation-latest",
|
|
755
|
+
results=results,
|
|
756
|
+
)
|
|
757
|
+
|
|
758
|
+
except ValueError:
|
|
759
|
+
raise
|
|
760
|
+
except Exception as e:
|
|
761
|
+
logger.error("moderation_error", error=str(e))
|
|
762
|
+
raise
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
moderation_service = ModerationService()
|
|
766
|
+
`;
|
|
767
|
+
}
|
|
768
|
+
// ============================================================================
|
|
769
|
+
// RAG Module: embedding_service.py
|
|
770
|
+
// ============================================================================
|
|
771
|
+
function generateEmbeddingService() {
|
|
772
|
+
return `"""Embedding generation service using LiteLLM. Auto-generated by ChimerAI CLI."""
|
|
773
|
+
|
|
774
|
+
from typing import Optional
|
|
775
|
+
from litellm import aembedding
|
|
776
|
+
import structlog
|
|
777
|
+
|
|
778
|
+
from models import (
|
|
779
|
+
EmbeddingRequest,
|
|
780
|
+
EmbeddingResponse,
|
|
781
|
+
EmbeddingData,
|
|
782
|
+
EmbeddingUsage,
|
|
783
|
+
)
|
|
784
|
+
from config import settings
|
|
785
|
+
from provider_client import provider_client
|
|
786
|
+
|
|
787
|
+
logger = structlog.get_logger()
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
class EmbeddingService:
|
|
791
|
+
"""Service for generating embeddings using LiteLLM."""
|
|
792
|
+
|
|
793
|
+
async def _resolve_provider_kwargs(self, provider_id: Optional[str] = None) -> dict:
|
|
794
|
+
kwargs: dict = {}
|
|
795
|
+
|
|
796
|
+
if provider_id:
|
|
797
|
+
provider = await provider_client.get_provider(provider_id)
|
|
798
|
+
if provider:
|
|
799
|
+
if provider.api_key:
|
|
800
|
+
kwargs["api_key"] = provider.api_key
|
|
801
|
+
if provider.base_url:
|
|
802
|
+
kwargs["api_base"] = provider.base_url
|
|
803
|
+
return kwargs
|
|
804
|
+
|
|
805
|
+
if await provider_client.is_available():
|
|
806
|
+
default = await provider_client.get_default_provider()
|
|
807
|
+
if default and default.api_key:
|
|
808
|
+
kwargs["api_key"] = default.api_key
|
|
809
|
+
if default.base_url:
|
|
810
|
+
kwargs["api_base"] = default.base_url
|
|
811
|
+
return kwargs
|
|
812
|
+
|
|
813
|
+
return kwargs
|
|
814
|
+
|
|
815
|
+
async def create_embeddings(
|
|
816
|
+
self,
|
|
817
|
+
request: EmbeddingRequest,
|
|
818
|
+
provider_id: Optional[str] = None,
|
|
819
|
+
user_id: Optional[str] = None,
|
|
820
|
+
) -> EmbeddingResponse:
|
|
821
|
+
logger.info("embedding_request", model=request.model)
|
|
822
|
+
|
|
823
|
+
try:
|
|
824
|
+
provider_kwargs = await self._resolve_provider_kwargs(provider_id=provider_id)
|
|
825
|
+
|
|
826
|
+
if isinstance(request.input, str):
|
|
827
|
+
inputs = [request.input]
|
|
828
|
+
else:
|
|
829
|
+
inputs = request.input
|
|
830
|
+
|
|
831
|
+
response = await aembedding(
|
|
832
|
+
model=request.model,
|
|
833
|
+
input=inputs,
|
|
834
|
+
user=request.user,
|
|
835
|
+
**provider_kwargs,
|
|
836
|
+
)
|
|
837
|
+
|
|
838
|
+
embeddings_data = [
|
|
839
|
+
EmbeddingData(
|
|
840
|
+
object="embedding",
|
|
841
|
+
embedding=emb.embedding if hasattr(emb, "embedding") else emb["embedding"],
|
|
842
|
+
index=i,
|
|
843
|
+
)
|
|
844
|
+
for i, emb in enumerate(response.data)
|
|
845
|
+
]
|
|
846
|
+
|
|
847
|
+
result = EmbeddingResponse(
|
|
848
|
+
object="list",
|
|
849
|
+
data=embeddings_data,
|
|
850
|
+
model=response.model,
|
|
851
|
+
usage=EmbeddingUsage(
|
|
852
|
+
prompt_tokens=response.usage.prompt_tokens,
|
|
853
|
+
total_tokens=response.usage.total_tokens,
|
|
854
|
+
),
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
if provider_id and user_id:
|
|
858
|
+
await provider_client.report_usage(
|
|
859
|
+
provider_id=provider_id,
|
|
860
|
+
user_id=user_id,
|
|
861
|
+
model=request.model,
|
|
862
|
+
prompt_tokens=response.usage.prompt_tokens,
|
|
863
|
+
completion_tokens=0,
|
|
864
|
+
endpoint="/api/embeddings",
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
return result
|
|
868
|
+
|
|
869
|
+
except Exception as e:
|
|
870
|
+
logger.error("embedding_error", error=str(e))
|
|
871
|
+
raise
|
|
872
|
+
|
|
873
|
+
|
|
874
|
+
embedding_service = EmbeddingService()
|
|
875
|
+
`;
|
|
876
|
+
}
|
|
877
|
+
// ============================================================================
|
|
878
|
+
// RAG Module: rag_service.py
|
|
879
|
+
// ============================================================================
|
|
880
|
+
function generateFileExtractor() {
|
|
881
|
+
return `"""File text extraction for RAG uploads. Auto-generated by ChimerAI CLI.
|
|
882
|
+
|
|
883
|
+
Supported formats:
|
|
884
|
+
- .txt, .md, .csv, .json, .log — plain text (UTF-8)
|
|
885
|
+
- .pdf — via PyMuPDF (pymupdf)
|
|
886
|
+
- .docx — via python-docx
|
|
887
|
+
"""
|
|
888
|
+
|
|
889
|
+
import io
|
|
890
|
+
import structlog
|
|
891
|
+
from pathlib import Path
|
|
892
|
+
|
|
893
|
+
logger = structlog.get_logger()
|
|
894
|
+
|
|
895
|
+
# Optional imports — graceful fallback
|
|
896
|
+
try:
|
|
897
|
+
import fitz # pymupdf
|
|
898
|
+
PDF_AVAILABLE = True
|
|
899
|
+
except ImportError:
|
|
900
|
+
PDF_AVAILABLE = False
|
|
901
|
+
logger.warning("pymupdf not installed — PDF extraction disabled. Install with: pip install pymupdf")
|
|
902
|
+
|
|
903
|
+
try:
|
|
904
|
+
from docx import Document as DocxDocument
|
|
905
|
+
DOCX_AVAILABLE = True
|
|
906
|
+
except ImportError:
|
|
907
|
+
DOCX_AVAILABLE = False
|
|
908
|
+
logger.warning("python-docx not installed — DOCX extraction disabled. Install with: pip install python-docx")
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
# Plain text extensions (read as UTF-8)
|
|
912
|
+
TEXT_EXTENSIONS = {".txt", ".md", ".csv", ".json", ".log", ".xml", ".yaml", ".yml", ".html", ".htm", ".rst", ".py", ".js", ".ts"}
|
|
913
|
+
|
|
914
|
+
|
|
915
|
+
def extract_text(file_bytes: bytes, filename: str) -> str:
|
|
916
|
+
"""Extract text content from a file based on its extension.
|
|
917
|
+
|
|
918
|
+
Args:
|
|
919
|
+
file_bytes: Raw file content as bytes.
|
|
920
|
+
filename: Original filename (used to detect format).
|
|
921
|
+
|
|
922
|
+
Returns:
|
|
923
|
+
Extracted text content.
|
|
924
|
+
|
|
925
|
+
Raises:
|
|
926
|
+
ValueError: If the file format is not supported or extraction fails.
|
|
927
|
+
"""
|
|
928
|
+
ext = Path(filename).suffix.lower()
|
|
929
|
+
|
|
930
|
+
if ext == ".pdf":
|
|
931
|
+
return _extract_pdf(file_bytes, filename)
|
|
932
|
+
elif ext == ".docx":
|
|
933
|
+
return _extract_docx(file_bytes, filename)
|
|
934
|
+
elif ext in TEXT_EXTENSIONS or not ext:
|
|
935
|
+
return _extract_text(file_bytes, filename)
|
|
936
|
+
else:
|
|
937
|
+
# Try as text, but warn
|
|
938
|
+
logger.warning("unknown_file_extension", filename=filename, extension=ext, fallback="text")
|
|
939
|
+
return _extract_text(file_bytes, filename)
|
|
940
|
+
|
|
941
|
+
|
|
942
|
+
def _extract_pdf(file_bytes: bytes, filename: str) -> str:
|
|
943
|
+
"""Extract text from PDF using PyMuPDF."""
|
|
944
|
+
if not PDF_AVAILABLE:
|
|
945
|
+
raise ValueError(
|
|
946
|
+
f"Cannot extract text from PDF '{filename}': pymupdf is not installed. "
|
|
947
|
+
f"Install with: pip install pymupdf"
|
|
948
|
+
)
|
|
949
|
+
try:
|
|
950
|
+
doc = fitz.open(stream=file_bytes, filetype="pdf")
|
|
951
|
+
pages = []
|
|
952
|
+
for page in doc:
|
|
953
|
+
text = page.get_text()
|
|
954
|
+
if text.strip():
|
|
955
|
+
pages.append(text)
|
|
956
|
+
doc.close()
|
|
957
|
+
|
|
958
|
+
if not pages:
|
|
959
|
+
raise ValueError(f"PDF '{filename}' contains no extractable text (possibly scanned/image-only).")
|
|
960
|
+
|
|
961
|
+
result = "\\n\\n".join(pages)
|
|
962
|
+
logger.info("pdf_extracted", filename=filename, pages=len(pages), chars=len(result))
|
|
963
|
+
return result
|
|
964
|
+
except ValueError:
|
|
965
|
+
raise
|
|
966
|
+
except Exception as e:
|
|
967
|
+
raise ValueError(f"Failed to extract text from PDF '{filename}': {e}")
|
|
968
|
+
|
|
969
|
+
|
|
970
|
+
def _extract_docx(file_bytes: bytes, filename: str) -> str:
|
|
971
|
+
"""Extract text from DOCX using python-docx."""
|
|
972
|
+
if not DOCX_AVAILABLE:
|
|
973
|
+
raise ValueError(
|
|
974
|
+
f"Cannot extract text from DOCX '{filename}': python-docx is not installed. "
|
|
975
|
+
f"Install with: pip install python-docx"
|
|
976
|
+
)
|
|
977
|
+
try:
|
|
978
|
+
doc = DocxDocument(io.BytesIO(file_bytes))
|
|
979
|
+
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
|
|
980
|
+
|
|
981
|
+
if not paragraphs:
|
|
982
|
+
raise ValueError(f"DOCX '{filename}' contains no text content.")
|
|
983
|
+
|
|
984
|
+
result = "\\n\\n".join(paragraphs)
|
|
985
|
+
logger.info("docx_extracted", filename=filename, paragraphs=len(paragraphs), chars=len(result))
|
|
986
|
+
return result
|
|
987
|
+
except ValueError:
|
|
988
|
+
raise
|
|
989
|
+
except Exception as e:
|
|
990
|
+
raise ValueError(f"Failed to extract text from DOCX '{filename}': {e}")
|
|
991
|
+
|
|
992
|
+
|
|
993
|
+
def _extract_text(file_bytes: bytes, filename: str) -> str:
|
|
994
|
+
"""Read file as UTF-8 text."""
|
|
995
|
+
try:
|
|
996
|
+
text = file_bytes.decode("utf-8")
|
|
997
|
+
if not text.strip():
|
|
998
|
+
raise ValueError(f"File '{filename}' is empty or contains only whitespace.")
|
|
999
|
+
logger.info("text_extracted", filename=filename, chars=len(text))
|
|
1000
|
+
return text
|
|
1001
|
+
except UnicodeDecodeError:
|
|
1002
|
+
# Try latin-1 as fallback
|
|
1003
|
+
try:
|
|
1004
|
+
text = file_bytes.decode("latin-1")
|
|
1005
|
+
logger.warning("text_fallback_latin1", filename=filename)
|
|
1006
|
+
return text
|
|
1007
|
+
except Exception:
|
|
1008
|
+
raise ValueError(f"File '{filename}' is not a valid text file (encoding not supported).")
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
def get_supported_formats() -> dict:
|
|
1012
|
+
"""Return supported file formats and their availability."""
|
|
1013
|
+
return {
|
|
1014
|
+
"text": sorted(TEXT_EXTENSIONS),
|
|
1015
|
+
"pdf": {".pdf"} if PDF_AVAILABLE else None,
|
|
1016
|
+
"docx": {".docx"} if DOCX_AVAILABLE else None,
|
|
1017
|
+
"pdf_available": PDF_AVAILABLE,
|
|
1018
|
+
"docx_available": DOCX_AVAILABLE,
|
|
1019
|
+
}
|
|
1020
|
+
`;
|
|
1021
|
+
}
|
|
1022
|
+
function generateRagService() {
|
|
1023
|
+
return `"""RAG (Retrieval Augmented Generation) service. Auto-generated by ChimerAI CLI."""
|
|
1024
|
+
|
|
1025
|
+
from typing import List, Optional
|
|
1026
|
+
import structlog
|
|
1027
|
+
|
|
1028
|
+
from config import settings
|
|
1029
|
+
from services.vector_store import vector_store, FAISS_AVAILABLE
|
|
1030
|
+
from services.chat_service import chat_service
|
|
1031
|
+
from models import ChatCompletionRequest, ChatMessage, MessageRole
|
|
1032
|
+
|
|
1033
|
+
logger = structlog.get_logger()
|
|
1034
|
+
|
|
1035
|
+
|
|
1036
|
+
class RAGService:
|
|
1037
|
+
"""Service for Retrieval Augmented Generation (RAG)."""
|
|
1038
|
+
|
|
1039
|
+
def _check_availability(self):
|
|
1040
|
+
if not FAISS_AVAILABLE or vector_store is None:
|
|
1041
|
+
raise RuntimeError(
|
|
1042
|
+
"FAISS vector store is not available. "
|
|
1043
|
+
"Install faiss-cpu: pip install faiss-cpu numpy"
|
|
1044
|
+
)
|
|
1045
|
+
|
|
1046
|
+
async def add_documents(
|
|
1047
|
+
self,
|
|
1048
|
+
documents: List[str],
|
|
1049
|
+
metadatas: Optional[List[dict]] = None,
|
|
1050
|
+
) -> dict:
|
|
1051
|
+
try:
|
|
1052
|
+
self._check_availability()
|
|
1053
|
+
ids = await vector_store.add_texts(documents, metadatas)
|
|
1054
|
+
logger.info("documents_added", count=len(ids))
|
|
1055
|
+
return {
|
|
1056
|
+
"status": "success",
|
|
1057
|
+
"added": len(ids),
|
|
1058
|
+
"ids": ids,
|
|
1059
|
+
"total_vectors": vector_store.get_stats()["total_vectors"],
|
|
1060
|
+
}
|
|
1061
|
+
except Exception as e:
|
|
1062
|
+
logger.error("add_documents_failed", error=str(e))
|
|
1063
|
+
raise
|
|
1064
|
+
|
|
1065
|
+
async def search_documents(self, query: str, k: int = 4) -> List[dict]:
|
|
1066
|
+
try:
|
|
1067
|
+
self._check_availability()
|
|
1068
|
+
results = await vector_store.similarity_search(query, k=k)
|
|
1069
|
+
logger.info("documents_searched", query_length=len(query), results=len(results))
|
|
1070
|
+
return results
|
|
1071
|
+
except Exception as e:
|
|
1072
|
+
logger.error("search_documents_failed", error=str(e))
|
|
1073
|
+
raise
|
|
1074
|
+
|
|
1075
|
+
async def rag_chat(
|
|
1076
|
+
self,
|
|
1077
|
+
query: str,
|
|
1078
|
+
model: Optional[str] = None,
|
|
1079
|
+
k: int = 3,
|
|
1080
|
+
temperature: float = 0.7,
|
|
1081
|
+
max_tokens: Optional[int] = None,
|
|
1082
|
+
provider_id: Optional[str] = None,
|
|
1083
|
+
user_id: Optional[str] = None,
|
|
1084
|
+
) -> dict:
|
|
1085
|
+
try:
|
|
1086
|
+
self._check_availability()
|
|
1087
|
+
|
|
1088
|
+
relevant_docs = await vector_store.similarity_search(query, k=k)
|
|
1089
|
+
|
|
1090
|
+
context_parts = []
|
|
1091
|
+
for i, doc in enumerate(relevant_docs, 1):
|
|
1092
|
+
context_parts.append(f"[Document {i}]\\n{doc['text']}")
|
|
1093
|
+
|
|
1094
|
+
context = "\\n\\n".join(context_parts) if context_parts else "No relevant documents found."
|
|
1095
|
+
|
|
1096
|
+
system_message = f"""You are a helpful assistant. Use the following context to answer the user's question.
|
|
1097
|
+
If the context doesn't contain relevant information, say so and provide a general answer.
|
|
1098
|
+
|
|
1099
|
+
Context:
|
|
1100
|
+
{context}"""
|
|
1101
|
+
|
|
1102
|
+
messages = [
|
|
1103
|
+
ChatMessage(role=MessageRole.SYSTEM, content=system_message),
|
|
1104
|
+
ChatMessage(role=MessageRole.USER, content=query),
|
|
1105
|
+
]
|
|
1106
|
+
|
|
1107
|
+
model = model or settings.default_chat_model
|
|
1108
|
+
|
|
1109
|
+
chat_request = ChatCompletionRequest(
|
|
1110
|
+
model=model,
|
|
1111
|
+
messages=messages,
|
|
1112
|
+
temperature=temperature,
|
|
1113
|
+
max_tokens=max_tokens,
|
|
1114
|
+
)
|
|
1115
|
+
response = await chat_service.create_completion(
|
|
1116
|
+
chat_request,
|
|
1117
|
+
provider_id=provider_id,
|
|
1118
|
+
user_id=user_id,
|
|
1119
|
+
)
|
|
1120
|
+
|
|
1121
|
+
response_dict = response.model_dump()
|
|
1122
|
+
|
|
1123
|
+
response_dict["rag_metadata"] = {
|
|
1124
|
+
"retrieved_documents": len(relevant_docs),
|
|
1125
|
+
"documents": [
|
|
1126
|
+
{
|
|
1127
|
+
"text": doc["text"][:200] + "..." if len(doc["text"]) > 200 else doc["text"],
|
|
1128
|
+
"score": doc["score"],
|
|
1129
|
+
"metadata": doc.get("metadata", {}),
|
|
1130
|
+
}
|
|
1131
|
+
for doc in relevant_docs
|
|
1132
|
+
],
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
logger.info("rag_chat_completed", query_length=len(query), docs_retrieved=len(relevant_docs))
|
|
1136
|
+
return response_dict
|
|
1137
|
+
|
|
1138
|
+
except Exception as e:
|
|
1139
|
+
logger.error("rag_chat_failed", error=str(e))
|
|
1140
|
+
raise
|
|
1141
|
+
|
|
1142
|
+
def get_stats(self) -> dict:
|
|
1143
|
+
self._check_availability()
|
|
1144
|
+
return vector_store.get_stats()
|
|
1145
|
+
|
|
1146
|
+
def clear_store(self):
|
|
1147
|
+
self._check_availability()
|
|
1148
|
+
vector_store.clear()
|
|
1149
|
+
return {"status": "success", "message": "Vector store cleared"}
|
|
1150
|
+
|
|
1151
|
+
async def delete_documents(self, document_ids: List[int]) -> dict:
|
|
1152
|
+
try:
|
|
1153
|
+
self._check_availability()
|
|
1154
|
+
deleted = await vector_store.delete_by_ids(document_ids)
|
|
1155
|
+
logger.info("documents_deleted", requested=len(document_ids), deleted=deleted)
|
|
1156
|
+
return {
|
|
1157
|
+
"status": "success",
|
|
1158
|
+
"deleted": deleted,
|
|
1159
|
+
"remaining_vectors": vector_store.get_stats()["total_vectors"],
|
|
1160
|
+
}
|
|
1161
|
+
except Exception as e:
|
|
1162
|
+
logger.error("delete_documents_failed", error=str(e))
|
|
1163
|
+
raise
|
|
1164
|
+
|
|
1165
|
+
|
|
1166
|
+
rag_service = RAGService()
|
|
1167
|
+
`;
|
|
1168
|
+
}
|
|
1169
|
+
// ============================================================================
|
|
1170
|
+
// RAG Module: vector_store.py
|
|
1171
|
+
// ============================================================================
|
|
1172
|
+
function generateVectorStore() {
|
|
1173
|
+
return `"""FAISS-based vector store with chunking and LiteLLM embeddings. Auto-generated by ChimerAI CLI."""
|
|
1174
|
+
|
|
1175
|
+
try:
|
|
1176
|
+
import faiss
|
|
1177
|
+
import numpy as np
|
|
1178
|
+
FAISS_AVAILABLE = True
|
|
1179
|
+
except Exception as e:
|
|
1180
|
+
FAISS_AVAILABLE = False
|
|
1181
|
+
print(f"Warning: FAISS/Numpy not available: {e}")
|
|
1182
|
+
|
|
1183
|
+
import pickle
|
|
1184
|
+
import os
|
|
1185
|
+
from typing import List, Dict, Any, Optional
|
|
1186
|
+
import litellm
|
|
1187
|
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
1188
|
+
from config import settings
|
|
1189
|
+
import structlog
|
|
1190
|
+
|
|
1191
|
+
logger = structlog.get_logger()
|
|
1192
|
+
|
|
1193
|
+
|
|
1194
|
+
class VectorStore:
|
|
1195
|
+
"""FAISS-based vector store for document embeddings and semantic search."""
|
|
1196
|
+
|
|
1197
|
+
def __init__(self, dimension: int = 1536, index_path: str = "data/faiss_index"):
|
|
1198
|
+
if not FAISS_AVAILABLE:
|
|
1199
|
+
raise RuntimeError("FAISS is not available. Please install faiss-cpu.")
|
|
1200
|
+
|
|
1201
|
+
self.dimension = dimension
|
|
1202
|
+
self.index_path = index_path
|
|
1203
|
+
self.metadata_path = f"{index_path}.metadata.pkl"
|
|
1204
|
+
|
|
1205
|
+
# Text splitter for chunking
|
|
1206
|
+
self.text_splitter = RecursiveCharacterTextSplitter(
|
|
1207
|
+
chunk_size=1000,
|
|
1208
|
+
chunk_overlap=200,
|
|
1209
|
+
length_function=len,
|
|
1210
|
+
separators=["\\n\\n", "\\n", ". ", " ", ""],
|
|
1211
|
+
)
|
|
1212
|
+
|
|
1213
|
+
self.index = None
|
|
1214
|
+
self.metadata: List[Dict[str, Any]] = []
|
|
1215
|
+
self._load_or_create_index()
|
|
1216
|
+
|
|
1217
|
+
def _load_or_create_index(self):
|
|
1218
|
+
if os.path.exists(self.index_path) and os.path.exists(self.metadata_path):
|
|
1219
|
+
try:
|
|
1220
|
+
self.index = faiss.read_index(self.index_path)
|
|
1221
|
+
with open(self.metadata_path, "rb") as f:
|
|
1222
|
+
self.metadata = pickle.load(f)
|
|
1223
|
+
logger.info("faiss_index_loaded", num_vectors=self.index.ntotal)
|
|
1224
|
+
except Exception as e:
|
|
1225
|
+
logger.error("faiss_index_load_failed", error=str(e))
|
|
1226
|
+
self._create_new_index()
|
|
1227
|
+
else:
|
|
1228
|
+
self._create_new_index()
|
|
1229
|
+
|
|
1230
|
+
def _create_new_index(self):
|
|
1231
|
+
self.index = faiss.IndexFlatL2(self.dimension)
|
|
1232
|
+
self.metadata = []
|
|
1233
|
+
logger.info("faiss_index_created", dimension=self.dimension)
|
|
1234
|
+
|
|
1235
|
+
def save(self):
|
|
1236
|
+
try:
|
|
1237
|
+
os.makedirs(os.path.dirname(self.index_path), exist_ok=True)
|
|
1238
|
+
faiss.write_index(self.index, self.index_path)
|
|
1239
|
+
with open(self.metadata_path, "wb") as f:
|
|
1240
|
+
pickle.dump(self.metadata, f)
|
|
1241
|
+
logger.info("faiss_index_saved", num_vectors=self.index.ntotal)
|
|
1242
|
+
except Exception as e:
|
|
1243
|
+
logger.error("faiss_index_save_failed", error=str(e))
|
|
1244
|
+
raise
|
|
1245
|
+
|
|
1246
|
+
async def add_texts(
|
|
1247
|
+
self, texts: List[str], metadatas: Optional[List[Dict[str, Any]]] = None
|
|
1248
|
+
) -> List[int]:
|
|
1249
|
+
if not texts:
|
|
1250
|
+
return []
|
|
1251
|
+
|
|
1252
|
+
if metadatas is None:
|
|
1253
|
+
metadatas = [{} for _ in texts]
|
|
1254
|
+
|
|
1255
|
+
# Split texts into chunks
|
|
1256
|
+
all_chunks: List[str] = []
|
|
1257
|
+
all_metadatas: List[Dict[str, Any]] = []
|
|
1258
|
+
|
|
1259
|
+
for i, text in enumerate(texts):
|
|
1260
|
+
chunks = self.text_splitter.split_text(text)
|
|
1261
|
+
if not chunks:
|
|
1262
|
+
chunks = [text]
|
|
1263
|
+
|
|
1264
|
+
meta = metadatas[i] if i < len(metadatas) else {}
|
|
1265
|
+
for j, chunk in enumerate(chunks):
|
|
1266
|
+
all_chunks.append(chunk)
|
|
1267
|
+
all_metadatas.append({
|
|
1268
|
+
**meta,
|
|
1269
|
+
"_chunk_index": j,
|
|
1270
|
+
"_chunk_total": len(chunks),
|
|
1271
|
+
"_source_doc_index": i,
|
|
1272
|
+
})
|
|
1273
|
+
|
|
1274
|
+
embeddings = await self._generate_embeddings(all_chunks)
|
|
1275
|
+
embeddings_array = np.array(embeddings, dtype=np.float32)
|
|
1276
|
+
|
|
1277
|
+
start_id = len(self.metadata)
|
|
1278
|
+
self.index.add(embeddings_array)
|
|
1279
|
+
|
|
1280
|
+
for k, (chunk, metadata) in enumerate(zip(all_chunks, all_metadatas)):
|
|
1281
|
+
self.metadata.append({"_system_id": start_id + k, "_system_text": chunk, **metadata})
|
|
1282
|
+
|
|
1283
|
+
self.save()
|
|
1284
|
+
ids = list(range(start_id, start_id + len(all_chunks)))
|
|
1285
|
+
logger.info("texts_added_to_index",
|
|
1286
|
+
documents=len(texts),
|
|
1287
|
+
chunks=len(all_chunks),
|
|
1288
|
+
total=self.index.ntotal)
|
|
1289
|
+
return ids
|
|
1290
|
+
|
|
1291
|
+
async def delete_by_ids(self, ids: List[int]) -> int:
|
|
1292
|
+
ids_set = set(ids)
|
|
1293
|
+
remaining = [m for m in self.metadata if m["_system_id"] not in ids_set]
|
|
1294
|
+
|
|
1295
|
+
if len(remaining) == len(self.metadata):
|
|
1296
|
+
return 0
|
|
1297
|
+
|
|
1298
|
+
deleted_count = len(self.metadata) - len(remaining)
|
|
1299
|
+
|
|
1300
|
+
self._create_new_index()
|
|
1301
|
+
if remaining:
|
|
1302
|
+
texts = [m["_system_text"] for m in remaining]
|
|
1303
|
+
embeddings = await self._generate_embeddings(texts)
|
|
1304
|
+
embeddings_array = np.array(embeddings, dtype=np.float32)
|
|
1305
|
+
self.index.add(embeddings_array)
|
|
1306
|
+
self.metadata = []
|
|
1307
|
+
for k, m in enumerate(remaining):
|
|
1308
|
+
new_meta = {key: val for key, val in m.items() if key != "_system_id"}
|
|
1309
|
+
new_meta["_system_id"] = k
|
|
1310
|
+
self.metadata.append(new_meta)
|
|
1311
|
+
|
|
1312
|
+
self.save()
|
|
1313
|
+
logger.info("documents_deleted", deleted=deleted_count, remaining=self.index.ntotal)
|
|
1314
|
+
return deleted_count
|
|
1315
|
+
|
|
1316
|
+
async def similarity_search(self, query: str, k: int = 4) -> List[Dict[str, Any]]:
|
|
1317
|
+
if self.index.ntotal == 0:
|
|
1318
|
+
return []
|
|
1319
|
+
|
|
1320
|
+
query_embeddings = await self._generate_embeddings([query])
|
|
1321
|
+
query_vector = np.array(query_embeddings, dtype=np.float32)
|
|
1322
|
+
|
|
1323
|
+
k = min(k, self.index.ntotal)
|
|
1324
|
+
distances, indices = self.index.search(query_vector, k)
|
|
1325
|
+
|
|
1326
|
+
results = []
|
|
1327
|
+
for i, (distance, idx) in enumerate(zip(distances[0], indices[0])):
|
|
1328
|
+
if idx < len(self.metadata):
|
|
1329
|
+
stored = self.metadata[idx]
|
|
1330
|
+
doc_id = stored.get("_system_id", idx)
|
|
1331
|
+
text = stored.get("_system_text", "")
|
|
1332
|
+
user_metadata = {
|
|
1333
|
+
k: v for k, v in stored.items() if not k.startswith("_system_")
|
|
1334
|
+
}
|
|
1335
|
+
similarity_score = 1.0 / (1.0 + float(distance))
|
|
1336
|
+
results.append({
|
|
1337
|
+
"id": doc_id,
|
|
1338
|
+
"text": text,
|
|
1339
|
+
"score": similarity_score,
|
|
1340
|
+
"rank": i + 1,
|
|
1341
|
+
"metadata": user_metadata,
|
|
1342
|
+
})
|
|
1343
|
+
|
|
1344
|
+
return results
|
|
1345
|
+
|
|
1346
|
+
async def _generate_embeddings(self, texts: List[str]) -> List[List[float]]:
|
|
1347
|
+
try:
|
|
1348
|
+
response = await litellm.aembedding(
|
|
1349
|
+
model=settings.default_embedding_model,
|
|
1350
|
+
input=texts,
|
|
1351
|
+
)
|
|
1352
|
+
embeddings = [
|
|
1353
|
+
item["embedding"] if isinstance(item, dict) else item.embedding
|
|
1354
|
+
for item in response.data
|
|
1355
|
+
]
|
|
1356
|
+
logger.info("embeddings_generated",
|
|
1357
|
+
count=len(embeddings),
|
|
1358
|
+
model=settings.default_embedding_model)
|
|
1359
|
+
return embeddings
|
|
1360
|
+
except Exception as e:
|
|
1361
|
+
logger.error("embedding_generation_failed", error=str(e))
|
|
1362
|
+
raise
|
|
1363
|
+
|
|
1364
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
1365
|
+
return {
|
|
1366
|
+
"total_vectors": self.index.ntotal,
|
|
1367
|
+
"dimension": self.dimension,
|
|
1368
|
+
"index_type": type(self.index).__name__,
|
|
1369
|
+
"metadata_count": len(self.metadata),
|
|
1370
|
+
}
|
|
1371
|
+
|
|
1372
|
+
def clear(self):
|
|
1373
|
+
self._create_new_index()
|
|
1374
|
+
self.save()
|
|
1375
|
+
logger.info("faiss_index_cleared")
|
|
1376
|
+
|
|
1377
|
+
|
|
1378
|
+
if FAISS_AVAILABLE:
|
|
1379
|
+
try:
|
|
1380
|
+
vector_store = VectorStore(dimension=settings.embedding_dimension)
|
|
1381
|
+
except Exception as e:
|
|
1382
|
+
logger.warning(f"Failed to initialize vector store: {e}")
|
|
1383
|
+
vector_store = None
|
|
1384
|
+
else:
|
|
1385
|
+
vector_store = None
|
|
1386
|
+
`;
|
|
1387
|
+
}
|
|
1388
|
+
// ============================================================================
|
|
1389
|
+
// Guardrails Module: guardrails_service.py
|
|
1390
|
+
// NOTE: Backslashes in regex patterns are double-escaped for JS template literals.
|
|
1391
|
+
// ============================================================================
|
|
1392
|
+
function generateGuardrailsService() {
|
|
1393
|
+
return `"""Guardrails and safety tools. Auto-generated by ChimerAI CLI."""
|
|
1394
|
+
|
|
1395
|
+
from typing import Dict, Any, List, Optional
|
|
1396
|
+
import re
|
|
1397
|
+
import structlog
|
|
1398
|
+
|
|
1399
|
+
logger = structlog.get_logger()
|
|
1400
|
+
|
|
1401
|
+
|
|
1402
|
+
class GuardrailsService:
|
|
1403
|
+
"""
|
|
1404
|
+
Input/Output validation and safety checks.
|
|
1405
|
+
|
|
1406
|
+
Features:
|
|
1407
|
+
- PII detection and redaction
|
|
1408
|
+
- Toxicity filtering
|
|
1409
|
+
- Prompt injection detection
|
|
1410
|
+
- Output validation
|
|
1411
|
+
"""
|
|
1412
|
+
|
|
1413
|
+
def __init__(self):
|
|
1414
|
+
self.pii_patterns = self._initialize_pii_patterns()
|
|
1415
|
+
self.toxic_keywords = self._load_toxic_keywords()
|
|
1416
|
+
|
|
1417
|
+
def _initialize_pii_patterns(self) -> Dict[str, re.Pattern]:
|
|
1418
|
+
return {
|
|
1419
|
+
"email": re.compile(r'\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b'),
|
|
1420
|
+
"phone": re.compile(r'\\b\\d{3}[-.]?\\d{3}[-.]?\\d{4}\\b'),
|
|
1421
|
+
"ssn": re.compile(r'\\b\\d{3}-\\d{2}-\\d{4}\\b'),
|
|
1422
|
+
"credit_card": re.compile(r'\\b\\d{4}[- ]?\\d{4}[- ]?\\d{4}[- ]?\\d{4}\\b'),
|
|
1423
|
+
"ip_address": re.compile(r'\\b\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\b'),
|
|
1424
|
+
"api_key": re.compile(r'\\b[A-Za-z0-9_-]{32,}\\b'),
|
|
1425
|
+
}
|
|
1426
|
+
|
|
1427
|
+
def _load_toxic_keywords(self) -> List[str]:
|
|
1428
|
+
return [
|
|
1429
|
+
"kill", "murder", "violent", "attack", "threat", "assault", "abuse",
|
|
1430
|
+
"torture", "harm", "hurt", "shoot", "stab", "destroy",
|
|
1431
|
+
"hate", "hatred", "racist", "supremacy", "genocide",
|
|
1432
|
+
"rape", "molest", "pedophile", "incest",
|
|
1433
|
+
"suicide", "self harm",
|
|
1434
|
+
"terrorist", "extremist", "radical", "bomb",
|
|
1435
|
+
]
|
|
1436
|
+
|
|
1437
|
+
async def detect_pii(self, text: str) -> Dict[str, Any]:
|
|
1438
|
+
try:
|
|
1439
|
+
detected = {}
|
|
1440
|
+
for pii_type, pattern in self.pii_patterns.items():
|
|
1441
|
+
matches = pattern.findall(text)
|
|
1442
|
+
if matches:
|
|
1443
|
+
detected[pii_type] = {"count": len(matches), "samples": matches[:3]}
|
|
1444
|
+
|
|
1445
|
+
has_pii = len(detected) > 0
|
|
1446
|
+
logger.info("pii_detection", has_pii=has_pii, types=list(detected.keys()))
|
|
1447
|
+
return {"has_pii": has_pii, "detected": detected, "risk_level": "high" if has_pii else "low"}
|
|
1448
|
+
|
|
1449
|
+
except Exception as e:
|
|
1450
|
+
logger.error("pii_detection_error", error=str(e))
|
|
1451
|
+
raise
|
|
1452
|
+
|
|
1453
|
+
async def redact_pii(self, text: str, redaction_char: str = "\\u2588") -> Dict[str, Any]:
|
|
1454
|
+
try:
|
|
1455
|
+
redacted_text = text
|
|
1456
|
+
changes = []
|
|
1457
|
+
for pii_type, pattern in self.pii_patterns.items():
|
|
1458
|
+
for match in pattern.finditer(text):
|
|
1459
|
+
original = match.group()
|
|
1460
|
+
replacement = redaction_char * len(original)
|
|
1461
|
+
redacted_text = redacted_text.replace(original, replacement)
|
|
1462
|
+
changes.append({"type": pii_type, "original": original, "position": match.span()})
|
|
1463
|
+
|
|
1464
|
+
logger.info("pii_redaction", changes_count=len(changes))
|
|
1465
|
+
return {"redacted_text": redacted_text, "changes": changes, "redactions_count": len(changes)}
|
|
1466
|
+
|
|
1467
|
+
except Exception as e:
|
|
1468
|
+
logger.error("pii_redaction_error", error=str(e))
|
|
1469
|
+
raise
|
|
1470
|
+
|
|
1471
|
+
async def check_toxicity(self, text: str) -> Dict[str, Any]:
|
|
1472
|
+
try:
|
|
1473
|
+
text_lower = text.lower()
|
|
1474
|
+
toxic_matches = [kw for kw in self.toxic_keywords if kw in text_lower]
|
|
1475
|
+
toxicity_score = min(len(toxic_matches) / 10.0, 1.0)
|
|
1476
|
+
is_toxic = toxicity_score > 0.3
|
|
1477
|
+
|
|
1478
|
+
logger.info("toxicity_check", is_toxic=is_toxic, score=toxicity_score)
|
|
1479
|
+
return {
|
|
1480
|
+
"is_toxic": is_toxic,
|
|
1481
|
+
"toxicity_score": toxicity_score,
|
|
1482
|
+
"matched_keywords": toxic_matches[:5],
|
|
1483
|
+
"risk_level": "high" if toxicity_score > 0.7 else "medium" if toxicity_score > 0.3 else "low",
|
|
1484
|
+
}
|
|
1485
|
+
|
|
1486
|
+
except Exception as e:
|
|
1487
|
+
logger.error("toxicity_check_error", error=str(e))
|
|
1488
|
+
raise
|
|
1489
|
+
|
|
1490
|
+
async def detect_prompt_injection(self, prompt: str) -> Dict[str, Any]:
|
|
1491
|
+
try:
|
|
1492
|
+
injection_patterns = [
|
|
1493
|
+
r'ignore (previous|above|all) (instructions|prompts)',
|
|
1494
|
+
r'you are (now|a) (?!assistant)',
|
|
1495
|
+
r'(system|admin) (prompt|mode)',
|
|
1496
|
+
r'<\\\\|im_start\\\\|>',
|
|
1497
|
+
r'###\\\\s*system',
|
|
1498
|
+
r'disregard',
|
|
1499
|
+
r'forget (everything|all)',
|
|
1500
|
+
]
|
|
1501
|
+
|
|
1502
|
+
detected = []
|
|
1503
|
+
prompt_lower = prompt.lower()
|
|
1504
|
+
for pattern in injection_patterns:
|
|
1505
|
+
if re.search(pattern, prompt_lower):
|
|
1506
|
+
detected.append(pattern)
|
|
1507
|
+
|
|
1508
|
+
is_injection = len(detected) > 0
|
|
1509
|
+
logger.warning("prompt_injection_check", detected=is_injection)
|
|
1510
|
+
return {
|
|
1511
|
+
"is_injection": is_injection,
|
|
1512
|
+
"confidence": len(detected) / len(injection_patterns),
|
|
1513
|
+
"matched_patterns": detected,
|
|
1514
|
+
"risk_level": "high" if is_injection else "low",
|
|
1515
|
+
}
|
|
1516
|
+
|
|
1517
|
+
except Exception as e:
|
|
1518
|
+
logger.error("injection_detection_error", error=str(e))
|
|
1519
|
+
raise
|
|
1520
|
+
|
|
1521
|
+
async def validate_output(
|
|
1522
|
+
self,
|
|
1523
|
+
output: str,
|
|
1524
|
+
max_length: Optional[int] = None,
|
|
1525
|
+
required_elements: Optional[List[str]] = None,
|
|
1526
|
+
) -> Dict[str, Any]:
|
|
1527
|
+
try:
|
|
1528
|
+
issues = []
|
|
1529
|
+
|
|
1530
|
+
if max_length and len(output) > max_length:
|
|
1531
|
+
issues.append(f"Output exceeds max length ({len(output)} > {max_length})")
|
|
1532
|
+
|
|
1533
|
+
if required_elements:
|
|
1534
|
+
output_lower = output.lower()
|
|
1535
|
+
missing = [el for el in required_elements if el.lower() not in output_lower]
|
|
1536
|
+
if missing:
|
|
1537
|
+
issues.append(f"Missing required elements: {missing}")
|
|
1538
|
+
|
|
1539
|
+
pii_result = await self.detect_pii(output)
|
|
1540
|
+
if pii_result["has_pii"]:
|
|
1541
|
+
issues.append(f"Output contains PII: {list(pii_result['detected'].keys())}")
|
|
1542
|
+
|
|
1543
|
+
toxicity_result = await self.check_toxicity(output)
|
|
1544
|
+
if toxicity_result["is_toxic"]:
|
|
1545
|
+
issues.append(f"Output contains toxic content (score: {toxicity_result['toxicity_score']:.2f})")
|
|
1546
|
+
|
|
1547
|
+
is_valid = len(issues) == 0
|
|
1548
|
+
logger.info("output_validation", is_valid=is_valid, issues_count=len(issues))
|
|
1549
|
+
return {
|
|
1550
|
+
"is_valid": is_valid,
|
|
1551
|
+
"issues": issues,
|
|
1552
|
+
"pii_detected": pii_result["has_pii"],
|
|
1553
|
+
"toxicity_score": toxicity_result["toxicity_score"],
|
|
1554
|
+
}
|
|
1555
|
+
|
|
1556
|
+
except Exception as e:
|
|
1557
|
+
logger.error("output_validation_error", error=str(e))
|
|
1558
|
+
raise
|
|
1559
|
+
|
|
1560
|
+
async def sanitize_input(self, text: str) -> str:
|
|
1561
|
+
try:
|
|
1562
|
+
sanitized = re.sub(r'[\\x00-\\x1f\\x7f-\\x9f]', '', text)
|
|
1563
|
+
sanitized = re.sub(r'\\s+', ' ', sanitized)
|
|
1564
|
+
max_length = 10_000
|
|
1565
|
+
if len(sanitized) > max_length:
|
|
1566
|
+
sanitized = sanitized[:max_length]
|
|
1567
|
+
logger.warning("input_truncated", original_length=len(text))
|
|
1568
|
+
return sanitized.strip()
|
|
1569
|
+
|
|
1570
|
+
except Exception as e:
|
|
1571
|
+
logger.error("input_sanitization_error", error=str(e))
|
|
1572
|
+
raise
|
|
1573
|
+
|
|
1574
|
+
|
|
1575
|
+
guardrails_service = GuardrailsService()
|
|
1576
|
+
`;
|
|
1577
|
+
}
|
|
1578
|
+
// ============================================================================
|
|
1579
|
+
// Dynamic Generators — main.py, config.py, models.py, requirements.txt, etc.
|
|
1580
|
+
// ============================================================================
|
|
1581
|
+
/**
|
|
1582
|
+
* Generate main.py — conditional imports and router registration based on modules.
|
|
1583
|
+
*/
|
|
1584
|
+
function generateAiServiceMain(modules, tools = []) {
|
|
1585
|
+
const hasChat = modules.includes('chat');
|
|
1586
|
+
const hasRag = modules.includes('rag');
|
|
1587
|
+
const hasGuardrails = modules.includes('guardrails');
|
|
1588
|
+
const hasTools = tools.length > 0;
|
|
1589
|
+
const imports = [];
|
|
1590
|
+
const routers = [];
|
|
1591
|
+
if (hasChat) {
|
|
1592
|
+
imports.push('from routes.chat_routes import router as chat_router');
|
|
1593
|
+
routers.push('app.include_router(chat_router)');
|
|
1594
|
+
}
|
|
1595
|
+
if (hasRag) {
|
|
1596
|
+
imports.push('from routes.rag_routes import router as rag_router');
|
|
1597
|
+
routers.push('app.include_router(rag_router)');
|
|
1598
|
+
}
|
|
1599
|
+
if (hasGuardrails) {
|
|
1600
|
+
imports.push('from routes.guardrails_routes import router as guardrails_router');
|
|
1601
|
+
routers.push('app.include_router(guardrails_router)');
|
|
1602
|
+
}
|
|
1603
|
+
if (hasTools) {
|
|
1604
|
+
imports.push('from routes.tools_routes import router as tools_router');
|
|
1605
|
+
routers.push('app.include_router(tools_router)');
|
|
1606
|
+
}
|
|
1607
|
+
return `"""ChimerAI AI Service — Auto-generated by ChimerAI CLI."""
|
|
1608
|
+
|
|
1609
|
+
import os
|
|
1610
|
+
from contextlib import asynccontextmanager
|
|
1611
|
+
from fastapi import FastAPI, Request
|
|
1612
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
1613
|
+
from fastapi.responses import JSONResponse
|
|
1614
|
+
import structlog
|
|
1615
|
+
import uvicorn
|
|
1616
|
+
|
|
1617
|
+
from config import settings
|
|
1618
|
+
from provider_client import provider_client
|
|
1619
|
+
|
|
1620
|
+
${imports.join('\n')}
|
|
1621
|
+
|
|
1622
|
+
structlog.configure(
|
|
1623
|
+
processors=[
|
|
1624
|
+
structlog.processors.TimeStamper(fmt="iso"),
|
|
1625
|
+
structlog.processors.JSONRenderer(),
|
|
1626
|
+
]
|
|
1627
|
+
)
|
|
1628
|
+
|
|
1629
|
+
logger = structlog.get_logger()
|
|
1630
|
+
|
|
1631
|
+
|
|
1632
|
+
@asynccontextmanager
|
|
1633
|
+
async def lifespan(app: FastAPI):
|
|
1634
|
+
"""Startup and shutdown logic."""
|
|
1635
|
+
logger.info("ai_service_starting", port=settings.port)
|
|
1636
|
+
|
|
1637
|
+
# Set OpenAI key as env fallback for LiteLLM
|
|
1638
|
+
if settings.openai_api_key:
|
|
1639
|
+
os.environ["OPENAI_API_KEY"] = settings.openai_api_key
|
|
1640
|
+
|
|
1641
|
+
# Try to connect to Frontend Internal API for centralized providers
|
|
1642
|
+
provider_mode = "env_fallback"
|
|
1643
|
+
if settings.internal_api_token:
|
|
1644
|
+
try:
|
|
1645
|
+
available = await provider_client.is_available()
|
|
1646
|
+
if available:
|
|
1647
|
+
providers = await provider_client.get_all_providers()
|
|
1648
|
+
provider_mode = "centralized"
|
|
1649
|
+
logger.info("provider_client_connected", providers_loaded=len(providers))
|
|
1650
|
+
|
|
1651
|
+
default = await provider_client.get_default_provider()
|
|
1652
|
+
if default and default.api_key:
|
|
1653
|
+
os.environ["OPENAI_API_KEY"] = default.api_key
|
|
1654
|
+
logger.info("default_provider_set", provider_name=default.name)
|
|
1655
|
+
else:
|
|
1656
|
+
logger.warning("provider_client_unavailable")
|
|
1657
|
+
except Exception as exc:
|
|
1658
|
+
logger.warning("provider_client_error", error=str(exc))
|
|
1659
|
+
|
|
1660
|
+
logger.info("ai_service_started", provider_mode=provider_mode)
|
|
1661
|
+
yield
|
|
1662
|
+
|
|
1663
|
+
await provider_client.close()
|
|
1664
|
+
logger.info("ai_service_stopped")
|
|
1665
|
+
|
|
1666
|
+
|
|
1667
|
+
app = FastAPI(
|
|
1668
|
+
title="ChimerAI AI Service",
|
|
1669
|
+
version="1.0.0",
|
|
1670
|
+
lifespan=lifespan,
|
|
1671
|
+
)
|
|
1672
|
+
|
|
1673
|
+
# CORS
|
|
1674
|
+
app.add_middleware(
|
|
1675
|
+
CORSMiddleware,
|
|
1676
|
+
allow_origins=[
|
|
1677
|
+
settings.frontend_url,
|
|
1678
|
+
"http://localhost:3000",
|
|
1679
|
+
"http://localhost:3001",
|
|
1680
|
+
],
|
|
1681
|
+
allow_credentials=True,
|
|
1682
|
+
allow_methods=["*"],
|
|
1683
|
+
allow_headers=["*"],
|
|
1684
|
+
)
|
|
1685
|
+
|
|
1686
|
+
|
|
1687
|
+
# Bearer token auth middleware
|
|
1688
|
+
@app.middleware("http")
|
|
1689
|
+
async def verify_service_token(request: Request, call_next):
|
|
1690
|
+
"""Verify INTERNAL_SERVICE_TOKEN on all non-health endpoints."""
|
|
1691
|
+
# Skip auth for health check and docs
|
|
1692
|
+
if request.url.path in ("/health", "/docs", "/openapi.json"):
|
|
1693
|
+
return await call_next(request)
|
|
1694
|
+
|
|
1695
|
+
expected_token = settings.internal_service_token
|
|
1696
|
+
if not expected_token:
|
|
1697
|
+
# No token configured — skip auth (development mode)
|
|
1698
|
+
return await call_next(request)
|
|
1699
|
+
|
|
1700
|
+
auth_header = request.headers.get("authorization", "")
|
|
1701
|
+
if not auth_header.startswith("Bearer "):
|
|
1702
|
+
return JSONResponse(
|
|
1703
|
+
status_code=401,
|
|
1704
|
+
content={"detail": "Missing or invalid Authorization header"},
|
|
1705
|
+
)
|
|
1706
|
+
|
|
1707
|
+
token = auth_header[7:]
|
|
1708
|
+
if token != expected_token:
|
|
1709
|
+
return JSONResponse(
|
|
1710
|
+
status_code=401,
|
|
1711
|
+
content={"detail": "Invalid service token"},
|
|
1712
|
+
)
|
|
1713
|
+
|
|
1714
|
+
return await call_next(request)
|
|
1715
|
+
|
|
1716
|
+
|
|
1717
|
+
# Register routers
|
|
1718
|
+
${routers.join('\n')}
|
|
1719
|
+
|
|
1720
|
+
|
|
1721
|
+
# Internal endpoints
|
|
1722
|
+
@app.post("/api/internal/invalidate-cache")
|
|
1723
|
+
async def invalidate_cache():
|
|
1724
|
+
"""Cache invalidation webhook from Frontend."""
|
|
1725
|
+
provider_client.invalidate_cache()
|
|
1726
|
+
return {"status": "ok"}
|
|
1727
|
+
|
|
1728
|
+
|
|
1729
|
+
@app.get("/health")
|
|
1730
|
+
async def health():
|
|
1731
|
+
"""Health check endpoint."""
|
|
1732
|
+
provider_available = await provider_client.is_available()
|
|
1733
|
+
return {
|
|
1734
|
+
"status": "healthy",
|
|
1735
|
+
"service": "chimerai-ai-service",
|
|
1736
|
+
"provider_mode": "centralized" if provider_available else "env_fallback",
|
|
1737
|
+
}
|
|
1738
|
+
|
|
1739
|
+
|
|
1740
|
+
if __name__ == "__main__":
|
|
1741
|
+
uvicorn.run(app, host=settings.host, port=settings.port, log_level="info")
|
|
1742
|
+
`;
|
|
1743
|
+
}
|
|
1744
|
+
/**
|
|
1745
|
+
* Generate config.py — conditional RAG settings.
|
|
1746
|
+
*/
|
|
1747
|
+
function generateAiServiceConfig(modules) {
|
|
1748
|
+
const hasRag = modules.includes('rag');
|
|
1749
|
+
const ragFields = hasRag
|
|
1750
|
+
? `
|
|
1751
|
+
# RAG Settings
|
|
1752
|
+
default_embedding_model: str = "text-embedding-ada-002"
|
|
1753
|
+
embedding_dimension: int = 1536
|
|
1754
|
+
`
|
|
1755
|
+
: '';
|
|
1756
|
+
return `"""ChimerAI AI Service Configuration. Auto-generated by ChimerAI CLI."""
|
|
1757
|
+
|
|
1758
|
+
from pydantic_settings import BaseSettings
|
|
1759
|
+
from typing import Optional
|
|
1760
|
+
|
|
1761
|
+
|
|
1762
|
+
class Settings(BaseSettings):
|
|
1763
|
+
# Service
|
|
1764
|
+
host: str = "0.0.0.0"
|
|
1765
|
+
port: int = 8002
|
|
1766
|
+
|
|
1767
|
+
# Provider Management (Frontend Internal API)
|
|
1768
|
+
frontend_url: str = "http://localhost:3000"
|
|
1769
|
+
internal_api_token: Optional[str] = None
|
|
1770
|
+
internal_service_token: Optional[str] = None
|
|
1771
|
+
|
|
1772
|
+
# API Keys (Fallback)
|
|
1773
|
+
openai_api_key: Optional[str] = None
|
|
1774
|
+
|
|
1775
|
+
# Caching
|
|
1776
|
+
redis_url: str = "redis://localhost:6379"
|
|
1777
|
+
redis_enabled: bool = False
|
|
1778
|
+
${ragFields}
|
|
1779
|
+
# Default Model
|
|
1780
|
+
default_chat_model: str = "gpt-3.5-turbo"
|
|
1781
|
+
|
|
1782
|
+
class Config:
|
|
1783
|
+
env_file = ".env"
|
|
1784
|
+
extra = "ignore"
|
|
1785
|
+
|
|
1786
|
+
|
|
1787
|
+
settings = Settings()
|
|
1788
|
+
`;
|
|
1789
|
+
}
|
|
1790
|
+
/**
|
|
1791
|
+
* Generate models.py — only Pydantic models needed by installed modules.
|
|
1792
|
+
*/
|
|
1793
|
+
function generateAiServiceModels(modules) {
|
|
1794
|
+
const hasChat = modules.includes('chat');
|
|
1795
|
+
const hasRag = modules.includes('rag');
|
|
1796
|
+
// Core models (always needed if chat is installed)
|
|
1797
|
+
const chatModels = hasChat
|
|
1798
|
+
? `
|
|
1799
|
+
# ============================================================================
|
|
1800
|
+
# Chat Models
|
|
1801
|
+
# ============================================================================
|
|
1802
|
+
|
|
1803
|
+
class MessageRole(str, Enum):
|
|
1804
|
+
SYSTEM = "system"
|
|
1805
|
+
USER = "user"
|
|
1806
|
+
ASSISTANT = "assistant"
|
|
1807
|
+
|
|
1808
|
+
|
|
1809
|
+
class ChatMessage(BaseModel):
|
|
1810
|
+
role: MessageRole
|
|
1811
|
+
content: str
|
|
1812
|
+
|
|
1813
|
+
|
|
1814
|
+
class ChatCompletionRequest(BaseModel):
|
|
1815
|
+
model: str
|
|
1816
|
+
messages: List[ChatMessage]
|
|
1817
|
+
temperature: Optional[float] = 0.7
|
|
1818
|
+
max_tokens: Optional[int] = None
|
|
1819
|
+
top_p: Optional[float] = 1.0
|
|
1820
|
+
frequency_penalty: Optional[float] = 0.0
|
|
1821
|
+
presence_penalty: Optional[float] = 0.0
|
|
1822
|
+
stream: Optional[bool] = False
|
|
1823
|
+
user: Optional[str] = None
|
|
1824
|
+
provider_id: Optional[str] = None
|
|
1825
|
+
user_id: Optional[str] = None
|
|
1826
|
+
|
|
1827
|
+
|
|
1828
|
+
class ChatCompletionChoice(BaseModel):
|
|
1829
|
+
index: int
|
|
1830
|
+
message: ChatMessage
|
|
1831
|
+
finish_reason: str
|
|
1832
|
+
|
|
1833
|
+
|
|
1834
|
+
class Usage(BaseModel):
|
|
1835
|
+
prompt_tokens: int
|
|
1836
|
+
completion_tokens: int
|
|
1837
|
+
total_tokens: int
|
|
1838
|
+
|
|
1839
|
+
|
|
1840
|
+
class ChatCompletionResponse(BaseModel):
|
|
1841
|
+
id: str
|
|
1842
|
+
object: str = "chat.completion"
|
|
1843
|
+
created: int
|
|
1844
|
+
model: str
|
|
1845
|
+
choices: List[ChatCompletionChoice]
|
|
1846
|
+
usage: Usage
|
|
1847
|
+
|
|
1848
|
+
|
|
1849
|
+
class DeltaMessage(BaseModel):
|
|
1850
|
+
role: Optional[MessageRole] = None
|
|
1851
|
+
content: Optional[str] = None
|
|
1852
|
+
|
|
1853
|
+
|
|
1854
|
+
class ChatCompletionChunkChoice(BaseModel):
|
|
1855
|
+
index: int
|
|
1856
|
+
delta: DeltaMessage
|
|
1857
|
+
finish_reason: Optional[str] = None
|
|
1858
|
+
|
|
1859
|
+
|
|
1860
|
+
class ChatCompletionChunk(BaseModel):
|
|
1861
|
+
id: str
|
|
1862
|
+
object: str = "chat.completion.chunk"
|
|
1863
|
+
created: int
|
|
1864
|
+
model: str
|
|
1865
|
+
choices: List[ChatCompletionChunkChoice]
|
|
1866
|
+
|
|
1867
|
+
|
|
1868
|
+
class ModelInfo(BaseModel):
|
|
1869
|
+
id: str
|
|
1870
|
+
object: str = "model"
|
|
1871
|
+
created: int
|
|
1872
|
+
owned_by: str
|
|
1873
|
+
|
|
1874
|
+
|
|
1875
|
+
class ModelsResponse(BaseModel):
|
|
1876
|
+
object: str = "list"
|
|
1877
|
+
data: List[ModelInfo]
|
|
1878
|
+
|
|
1879
|
+
|
|
1880
|
+
class ModerationRequest(BaseModel):
|
|
1881
|
+
input: Union[str, List[str]]
|
|
1882
|
+
model: Optional[str] = "omni-moderation-latest"
|
|
1883
|
+
provider_id: Optional[str] = None
|
|
1884
|
+
user_id: Optional[str] = None
|
|
1885
|
+
|
|
1886
|
+
|
|
1887
|
+
class ModerationCategories(BaseModel):
|
|
1888
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
1889
|
+
|
|
1890
|
+
hate: bool
|
|
1891
|
+
hate_threatening: bool = Field(alias="hate/threatening")
|
|
1892
|
+
harassment: bool
|
|
1893
|
+
harassment_threatening: bool = Field(alias="harassment/threatening")
|
|
1894
|
+
self_harm: bool = Field(alias="self-harm")
|
|
1895
|
+
self_harm_intent: bool = Field(alias="self-harm/intent")
|
|
1896
|
+
self_harm_instructions: bool = Field(alias="self-harm/instructions")
|
|
1897
|
+
sexual: bool
|
|
1898
|
+
sexual_minors: bool = Field(alias="sexual/minors")
|
|
1899
|
+
violence: bool
|
|
1900
|
+
violence_graphic: bool = Field(alias="violence/graphic")
|
|
1901
|
+
|
|
1902
|
+
|
|
1903
|
+
class ModerationScores(BaseModel):
|
|
1904
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
1905
|
+
|
|
1906
|
+
hate: float
|
|
1907
|
+
hate_threatening: float = Field(alias="hate/threatening")
|
|
1908
|
+
harassment: float
|
|
1909
|
+
harassment_threatening: float = Field(alias="harassment/threatening")
|
|
1910
|
+
self_harm: float = Field(alias="self-harm")
|
|
1911
|
+
self_harm_intent: float = Field(alias="self-harm/intent")
|
|
1912
|
+
self_harm_instructions: float = Field(alias="self-harm/instructions")
|
|
1913
|
+
sexual: float
|
|
1914
|
+
sexual_minors: float = Field(alias="sexual/minors")
|
|
1915
|
+
violence: float
|
|
1916
|
+
violence_graphic: float = Field(alias="violence/graphic")
|
|
1917
|
+
|
|
1918
|
+
|
|
1919
|
+
class ModerationResult(BaseModel):
|
|
1920
|
+
flagged: bool
|
|
1921
|
+
categories: ModerationCategories
|
|
1922
|
+
category_scores: ModerationScores
|
|
1923
|
+
|
|
1924
|
+
|
|
1925
|
+
class ModerationResponse(BaseModel):
|
|
1926
|
+
id: str
|
|
1927
|
+
model: str
|
|
1928
|
+
results: List[ModerationResult]
|
|
1929
|
+
`
|
|
1930
|
+
: '';
|
|
1931
|
+
const embeddingModels = hasRag || hasChat
|
|
1932
|
+
? `
|
|
1933
|
+
# ============================================================================
|
|
1934
|
+
# Embedding Models
|
|
1935
|
+
# ============================================================================
|
|
1936
|
+
|
|
1937
|
+
class EmbeddingRequest(BaseModel):
|
|
1938
|
+
model: str
|
|
1939
|
+
input: Union[str, List[str]]
|
|
1940
|
+
user: Optional[str] = None
|
|
1941
|
+
provider_id: Optional[str] = None
|
|
1942
|
+
user_id: Optional[str] = None
|
|
1943
|
+
|
|
1944
|
+
|
|
1945
|
+
class EmbeddingData(BaseModel):
|
|
1946
|
+
object: str = "embedding"
|
|
1947
|
+
embedding: List[float]
|
|
1948
|
+
index: int
|
|
1949
|
+
|
|
1950
|
+
|
|
1951
|
+
class EmbeddingUsage(BaseModel):
|
|
1952
|
+
prompt_tokens: int
|
|
1953
|
+
total_tokens: int
|
|
1954
|
+
|
|
1955
|
+
|
|
1956
|
+
class EmbeddingResponse(BaseModel):
|
|
1957
|
+
object: str = "list"
|
|
1958
|
+
data: List[EmbeddingData]
|
|
1959
|
+
model: str
|
|
1960
|
+
usage: EmbeddingUsage
|
|
1961
|
+
`
|
|
1962
|
+
: '';
|
|
1963
|
+
const ragModels = hasRag
|
|
1964
|
+
? `
|
|
1965
|
+
# ============================================================================
|
|
1966
|
+
# RAG Models
|
|
1967
|
+
# ============================================================================
|
|
1968
|
+
|
|
1969
|
+
class AddDocumentsRequest(BaseModel):
|
|
1970
|
+
documents: List[str]
|
|
1971
|
+
metadatas: Optional[List[dict]] = None
|
|
1972
|
+
|
|
1973
|
+
|
|
1974
|
+
class SearchRequest(BaseModel):
|
|
1975
|
+
query: str
|
|
1976
|
+
k: int = Field(default=4, ge=1, le=20)
|
|
1977
|
+
|
|
1978
|
+
|
|
1979
|
+
class SearchResult(BaseModel):
|
|
1980
|
+
id: int
|
|
1981
|
+
text: str
|
|
1982
|
+
score: float
|
|
1983
|
+
rank: int
|
|
1984
|
+
metadata: dict = {}
|
|
1985
|
+
|
|
1986
|
+
|
|
1987
|
+
class SearchResponse(BaseModel):
|
|
1988
|
+
results: List[SearchResult]
|
|
1989
|
+
query: str
|
|
1990
|
+
|
|
1991
|
+
|
|
1992
|
+
class RAGChatRequest(BaseModel):
|
|
1993
|
+
query: str
|
|
1994
|
+
model: Optional[str] = None
|
|
1995
|
+
k: int = Field(default=3, ge=1, le=10)
|
|
1996
|
+
temperature: Optional[float] = 0.7
|
|
1997
|
+
max_tokens: Optional[int] = None
|
|
1998
|
+
provider_id: Optional[str] = None
|
|
1999
|
+
user_id: Optional[str] = None
|
|
2000
|
+
|
|
2001
|
+
|
|
2002
|
+
class VectorStoreStats(BaseModel):
|
|
2003
|
+
total_vectors: int
|
|
2004
|
+
dimension: int
|
|
2005
|
+
index_type: str
|
|
2006
|
+
metadata_count: int
|
|
2007
|
+
|
|
2008
|
+
|
|
2009
|
+
class DeleteDocumentsRequest(BaseModel):
|
|
2010
|
+
document_ids: List[int]
|
|
2011
|
+
|
|
2012
|
+
|
|
2013
|
+
class DeleteDocumentsResponse(BaseModel):
|
|
2014
|
+
status: str
|
|
2015
|
+
deleted: int
|
|
2016
|
+
remaining_vectors: int
|
|
2017
|
+
`
|
|
2018
|
+
: '';
|
|
2019
|
+
return `"""Pydantic models for the AI Service. Auto-generated by ChimerAI CLI."""
|
|
2020
|
+
|
|
2021
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
2022
|
+
from typing import List, Optional, Union
|
|
2023
|
+
from enum import Enum
|
|
2024
|
+
${chatModels}${embeddingModels}${ragModels}`;
|
|
2025
|
+
}
|
|
2026
|
+
/**
|
|
2027
|
+
* Generate requirements.txt — only dependencies for installed modules.
|
|
2028
|
+
*/
|
|
2029
|
+
function generateAiServiceRequirements(modules, tools = []) {
|
|
2030
|
+
// Core dependencies (always)
|
|
2031
|
+
const deps = new Set([
|
|
2032
|
+
'fastapi>=0.104.1',
|
|
2033
|
+
'uvicorn[standard]>=0.24.0',
|
|
2034
|
+
'pydantic>=2.10.0',
|
|
2035
|
+
'pydantic-settings>=2.7.0',
|
|
2036
|
+
'httpx>=0.25.2',
|
|
2037
|
+
'litellm>=1.17.0',
|
|
2038
|
+
'structlog>=23.2.0',
|
|
2039
|
+
'python-dotenv>=1.0.0',
|
|
2040
|
+
'python-multipart>=0.0.6',
|
|
2041
|
+
]);
|
|
2042
|
+
// Chat module
|
|
2043
|
+
if (modules.includes('chat')) {
|
|
2044
|
+
deps.add('openai>=1.6.1');
|
|
2045
|
+
}
|
|
2046
|
+
// RAG module
|
|
2047
|
+
if (modules.includes('rag')) {
|
|
2048
|
+
deps.add('faiss-cpu>=1.9.0');
|
|
2049
|
+
deps.add('numpy>=1.26.2');
|
|
2050
|
+
deps.add('openai>=1.6.1');
|
|
2051
|
+
deps.add('langchain-text-splitters>=0.3.0');
|
|
2052
|
+
deps.add('pymupdf>=1.24.0');
|
|
2053
|
+
deps.add('python-docx>=1.1.0');
|
|
2054
|
+
}
|
|
2055
|
+
// Guardrails — no additional deps (uses regex from stdlib)
|
|
2056
|
+
// Tools
|
|
2057
|
+
for (const tool of tools) {
|
|
2058
|
+
const info = exports.TOOL_INFO[tool];
|
|
2059
|
+
if (info) {
|
|
2060
|
+
for (const dep of info.dependencies) {
|
|
2061
|
+
deps.add(dep);
|
|
2062
|
+
}
|
|
2063
|
+
}
|
|
2064
|
+
}
|
|
2065
|
+
// Sort and deduplicate
|
|
2066
|
+
const sorted = Array.from(deps).sort((a, b) => {
|
|
2067
|
+
const nameA = a.split('>=')[0].split('[')[0].toLowerCase();
|
|
2068
|
+
const nameB = b.split('>=')[0].split('[')[0].toLowerCase();
|
|
2069
|
+
return nameA.localeCompare(nameB);
|
|
2070
|
+
});
|
|
2071
|
+
return `# ChimerAI AI Service Dependencies — Auto-generated by ChimerAI CLI
|
|
2072
|
+
# Modules: ${modules.join(', ') || 'core'}
|
|
2073
|
+
# Tools: ${tools.join(', ') || 'none'}
|
|
2074
|
+
|
|
2075
|
+
${sorted.join('\n')}
|
|
2076
|
+
`;
|
|
2077
|
+
}
|
|
2078
|
+
/**
|
|
2079
|
+
* Generate Dockerfile for the AI-Service.
|
|
2080
|
+
*/
|
|
2081
|
+
function generateAiServiceDockerfile() {
|
|
2082
|
+
return `FROM python:3.11-slim
|
|
2083
|
+
|
|
2084
|
+
WORKDIR /app
|
|
2085
|
+
|
|
2086
|
+
RUN apt-get update && apt-get install -y \\
|
|
2087
|
+
build-essential \\
|
|
2088
|
+
curl \\
|
|
2089
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
2090
|
+
|
|
2091
|
+
COPY requirements.txt .
|
|
2092
|
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
2093
|
+
|
|
2094
|
+
COPY . .
|
|
2095
|
+
|
|
2096
|
+
RUN mkdir -p /app/models /app/data
|
|
2097
|
+
|
|
2098
|
+
EXPOSE 8002
|
|
2099
|
+
|
|
2100
|
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \\
|
|
2101
|
+
CMD curl -f http://localhost:8002/health || exit 1
|
|
2102
|
+
|
|
2103
|
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8002"]
|
|
2104
|
+
`;
|
|
2105
|
+
}
|
|
2106
|
+
/**
|
|
2107
|
+
* Generate README.md for the AI-Service.
|
|
2108
|
+
*/
|
|
2109
|
+
function generateAiServiceReadme(modules, tools = []) {
|
|
2110
|
+
const moduleList = modules.map((m) => `- **${m}**`).join('\n');
|
|
2111
|
+
const toolList = tools.length > 0
|
|
2112
|
+
? tools.map((t) => `- ${exports.TOOL_INFO[t]?.displayName || t}`).join('\n')
|
|
2113
|
+
: '- No tools installed';
|
|
2114
|
+
return `# ChimerAI AI Service
|
|
2115
|
+
|
|
2116
|
+
Auto-generated by ChimerAI CLI. This is a standalone FastAPI service that provides AI capabilities for your project.
|
|
2117
|
+
|
|
2118
|
+
## Installed Modules
|
|
2119
|
+
|
|
2120
|
+
${moduleList}
|
|
2121
|
+
|
|
2122
|
+
## Installed Tools
|
|
2123
|
+
|
|
2124
|
+
${toolList}
|
|
2125
|
+
|
|
2126
|
+
## Quick Start
|
|
2127
|
+
|
|
2128
|
+
### Option 1: Local (recommended for development)
|
|
2129
|
+
|
|
2130
|
+
\`\`\`bash
|
|
2131
|
+
cd services/ai
|
|
2132
|
+
python -m venv .venv
|
|
2133
|
+
|
|
2134
|
+
# Activate virtual environment
|
|
2135
|
+
# Windows:
|
|
2136
|
+
.venv\\\\Scripts\\\\activate
|
|
2137
|
+
# macOS/Linux:
|
|
2138
|
+
source .venv/bin/activate
|
|
2139
|
+
|
|
2140
|
+
pip install -r requirements.txt
|
|
2141
|
+
uvicorn main:app --reload --port 8002
|
|
2142
|
+
\`\`\`
|
|
2143
|
+
|
|
2144
|
+
### Option 2: Docker
|
|
2145
|
+
|
|
2146
|
+
\`\`\`bash
|
|
2147
|
+
docker compose up ai-service
|
|
2148
|
+
\`\`\`
|
|
2149
|
+
|
|
2150
|
+
### Option 3: ChimerAI Dev (starts everything)
|
|
2151
|
+
|
|
2152
|
+
\`\`\`bash
|
|
2153
|
+
chimerai dev
|
|
2154
|
+
\`\`\`
|
|
2155
|
+
|
|
2156
|
+
## Endpoints
|
|
2157
|
+
|
|
2158
|
+
- Health Check: \`GET /health\`
|
|
2159
|
+
${modules.includes('chat') ? '- Chat: `POST /api/chat`\n- Streaming: `POST /api/chat/stream`\n- Models: `GET /api/models`\n- Moderation: `POST /api/moderate`' : ''}
|
|
2160
|
+
${modules.includes('rag') ? '- Add Documents: `POST /api/rag/documents`\n- Search: `POST /api/rag/search`\n- RAG Chat: `POST /api/rag/chat`\n- Stats: `GET /api/rag/stats`\n- Embeddings: `POST /api/embeddings`' : ''}
|
|
2161
|
+
${modules.includes('guardrails') ? '- PII Detection: `POST /api/guardrails/pii/detect`\n- PII Redaction: `POST /api/guardrails/pii/redact`\n- Toxicity: `POST /api/guardrails/toxicity`\n- Injection Detection: `POST /api/guardrails/injection`' : ''}
|
|
2162
|
+
${tools.length > 0 ? '- Tools: `POST /api/tools/*`' : ''}
|
|
2163
|
+
|
|
2164
|
+
## Configuration
|
|
2165
|
+
|
|
2166
|
+
Environment variables (set in \`.env\` or \`.env.local\`):
|
|
2167
|
+
|
|
2168
|
+
| Variable | Default | Description |
|
|
2169
|
+
|----------|---------|-------------|
|
|
2170
|
+
| \`OPENAI_API_KEY\` | — | OpenAI API key (fallback) |
|
|
2171
|
+
| \`FRONTEND_URL\` | http://localhost:3000 | Next.js frontend URL |
|
|
2172
|
+
| \`INTERNAL_API_TOKEN\` | — | Token for Frontend ↔ AI Service communication |
|
|
2173
|
+
|
|
2174
|
+
## Python Version
|
|
2175
|
+
|
|
2176
|
+
Requires **Python 3.11+**.
|
|
2177
|
+
`;
|
|
2178
|
+
}
|
|
2179
|
+
/**
|
|
2180
|
+
* Generate the Docker Compose ai-service block to be appended.
|
|
2181
|
+
*/
|
|
2182
|
+
function generateDockerComposeAiService() {
|
|
2183
|
+
return `
|
|
2184
|
+
ai-service:
|
|
2185
|
+
build:
|
|
2186
|
+
context: .
|
|
2187
|
+
dockerfile: services/ai/Dockerfile
|
|
2188
|
+
container_name: \${COMPOSE_PROJECT_NAME:-chimerai}-ai-service
|
|
2189
|
+
restart: unless-stopped
|
|
2190
|
+
ports:
|
|
2191
|
+
- "\${AI_SERVICE_PORT:-8002}:8002"
|
|
2192
|
+
environment:
|
|
2193
|
+
- FRONTEND_URL=http://app:3000
|
|
2194
|
+
- INTERNAL_API_TOKEN=\${INTERNAL_API_TOKEN}
|
|
2195
|
+
- OPENAI_API_KEY=\${OPENAI_API_KEY}
|
|
2196
|
+
- REDIS_URL=redis://redis:6379
|
|
2197
|
+
volumes:
|
|
2198
|
+
- ai_data:/app/data
|
|
2199
|
+
networks:
|
|
2200
|
+
- chimerai-network
|
|
2201
|
+
`;
|
|
2202
|
+
}
|