@chimerai/cli 0.2.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +293 -0
- package/dist/cli.d.ts +7 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +317 -0
- package/dist/commands/add.d.ts +11 -0
- package/dist/commands/add.d.ts.map +1 -0
- package/dist/commands/add.js +2126 -0
- package/dist/commands/create.d.ts +12 -0
- package/dist/commands/create.d.ts.map +1 -0
- package/dist/commands/create.js +1703 -0
- package/dist/commands/deploy.d.ts +11 -0
- package/dist/commands/deploy.d.ts.map +1 -0
- package/dist/commands/deploy.js +219 -0
- package/dist/commands/dev.d.ts +17 -0
- package/dist/commands/dev.d.ts.map +1 -0
- package/dist/commands/dev.js +206 -0
- package/dist/commands/doctor.d.ts +11 -0
- package/dist/commands/doctor.d.ts.map +1 -0
- package/dist/commands/doctor.js +728 -0
- package/dist/commands/generate.d.ts +19 -0
- package/dist/commands/generate.d.ts.map +1 -0
- package/dist/commands/generate.js +429 -0
- package/dist/commands/init.d.ts +11 -0
- package/dist/commands/init.d.ts.map +1 -0
- package/dist/commands/init.js +269 -0
- package/dist/commands/list.d.ts +12 -0
- package/dist/commands/list.d.ts.map +1 -0
- package/dist/commands/list.js +328 -0
- package/dist/commands/migrate.d.ts +14 -0
- package/dist/commands/migrate.d.ts.map +1 -0
- package/dist/commands/migrate.js +197 -0
- package/dist/commands/plugin.d.ts +10 -0
- package/dist/commands/plugin.d.ts.map +1 -0
- package/dist/commands/plugin.js +239 -0
- package/dist/commands/remove.d.ts +11 -0
- package/dist/commands/remove.d.ts.map +1 -0
- package/dist/commands/remove.js +472 -0
- package/dist/commands/secret.d.ts +12 -0
- package/dist/commands/secret.d.ts.map +1 -0
- package/dist/commands/secret.js +102 -0
- package/dist/commands/setup.d.ts +9 -0
- package/dist/commands/setup.d.ts.map +1 -0
- package/dist/commands/setup.js +788 -0
- package/dist/commands/update.d.ts +14 -0
- package/dist/commands/update.d.ts.map +1 -0
- package/dist/commands/update.js +211 -0
- package/dist/commands/use.d.ts +9 -0
- package/dist/commands/use.d.ts.map +1 -0
- package/dist/commands/use.js +51 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +45 -0
- package/dist/license.d.ts +55 -0
- package/dist/license.d.ts.map +1 -0
- package/dist/license.js +258 -0
- package/dist/scanner.d.ts +31 -0
- package/dist/scanner.d.ts.map +1 -0
- package/dist/scanner.js +113 -0
- package/dist/schema-manager.d.ts +26 -0
- package/dist/schema-manager.d.ts.map +1 -0
- package/dist/schema-manager.js +132 -0
- package/dist/templates/admin.d.ts +49 -0
- package/dist/templates/admin.d.ts.map +1 -0
- package/dist/templates/admin.js +1358 -0
- package/dist/templates/ai-routes.d.ts +17 -0
- package/dist/templates/ai-routes.d.ts.map +1 -0
- package/dist/templates/ai-routes.js +1130 -0
- package/dist/templates/ai-service-tools.d.ts +22 -0
- package/dist/templates/ai-service-tools.d.ts.map +1 -0
- package/dist/templates/ai-service-tools.js +1424 -0
- package/dist/templates/ai-service.d.ts +66 -0
- package/dist/templates/ai-service.d.ts.map +1 -0
- package/dist/templates/ai-service.js +2202 -0
- package/dist/templates/api-routes.d.ts +108 -0
- package/dist/templates/api-routes.d.ts.map +1 -0
- package/dist/templates/api-routes.js +1219 -0
- package/dist/templates/auth.d.ts +48 -0
- package/dist/templates/auth.d.ts.map +1 -0
- package/dist/templates/auth.js +381 -0
- package/dist/templates/billing.d.ts +44 -0
- package/dist/templates/billing.d.ts.map +1 -0
- package/dist/templates/billing.js +551 -0
- package/dist/templates/chat.d.ts +63 -0
- package/dist/templates/chat.d.ts.map +1 -0
- package/dist/templates/chat.js +1979 -0
- package/dist/templates/components.d.ts +22 -0
- package/dist/templates/components.d.ts.map +1 -0
- package/dist/templates/components.js +672 -0
- package/dist/templates/config.d.ts +6 -0
- package/dist/templates/config.d.ts.map +1 -0
- package/dist/templates/config.js +86 -0
- package/dist/templates/docker.d.ts +25 -0
- package/dist/templates/docker.d.ts.map +1 -0
- package/dist/templates/docker.js +165 -0
- package/dist/templates/gdpr.d.ts +16 -0
- package/dist/templates/gdpr.d.ts.map +1 -0
- package/dist/templates/gdpr.js +259 -0
- package/dist/templates/index.d.ts +77 -0
- package/dist/templates/index.d.ts.map +1 -0
- package/dist/templates/index.js +339 -0
- package/dist/templates/layout.d.ts +67 -0
- package/dist/templates/layout.d.ts.map +1 -0
- package/dist/templates/layout.js +670 -0
- package/dist/templates/mfa.d.ts +23 -0
- package/dist/templates/mfa.d.ts.map +1 -0
- package/dist/templates/mfa.js +353 -0
- package/dist/templates/middleware.d.ts +12 -0
- package/dist/templates/middleware.d.ts.map +1 -0
- package/dist/templates/middleware.js +116 -0
- package/dist/templates/prisma.d.ts +35 -0
- package/dist/templates/prisma.d.ts.map +1 -0
- package/dist/templates/prisma.js +724 -0
- package/dist/templates/provider-routes.d.ts +21 -0
- package/dist/templates/provider-routes.d.ts.map +1 -0
- package/dist/templates/provider-routes.js +1203 -0
- package/dist/templates/rag.d.ts +48 -0
- package/dist/templates/rag.d.ts.map +1 -0
- package/dist/templates/rag.js +532 -0
- package/dist/templates/widget.d.ts +64 -0
- package/dist/templates/widget.d.ts.map +1 -0
- package/dist/templates/widget.js +1360 -0
- package/dist/utils/provider-db.d.ts +63 -0
- package/dist/utils/provider-db.d.ts.map +1 -0
- package/dist/utils/provider-db.js +300 -0
- package/dist/utils.d.ts +78 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +330 -0
- package/package.json +60 -0
|
@@ -0,0 +1,1424 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* AI Service Tool File Generators
|
|
4
|
+
*
|
|
5
|
+
* Embeds the 9 Python tool files as string literals.
|
|
6
|
+
* Each function returns the complete Python source file for a specific tool.
|
|
7
|
+
* Used by `chimerai add ai-tools` with interactive selection.
|
|
8
|
+
*/
|
|
9
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
|
+
exports.TOOL_GENERATORS = void 0;
|
|
11
|
+
exports.generateWebTools = generateWebTools;
|
|
12
|
+
exports.generateDocumentTools = generateDocumentTools;
|
|
13
|
+
exports.generateCodeTools = generateCodeTools;
|
|
14
|
+
exports.generateNlpTools = generateNlpTools;
|
|
15
|
+
exports.generateVisionTools = generateVisionTools;
|
|
16
|
+
exports.generateGoogleSheetsTools = generateGoogleSheetsTools;
|
|
17
|
+
exports.generateAirtableTools = generateAirtableTools;
|
|
18
|
+
exports.generateDeeplTools = generateDeeplTools;
|
|
19
|
+
exports.generateWebhookTools = generateWebhookTools;
|
|
20
|
+
exports.generateToolsInit = generateToolsInit;
|
|
21
|
+
// ============================================================================
|
|
22
|
+
// Web Tools — DuckDuckGo search + web scraping
|
|
23
|
+
// ============================================================================
|
|
24
|
+
function generateWebTools() {
|
|
25
|
+
return `"""Web Scraping and Search Tools. Auto-generated by ChimerAI CLI."""
|
|
26
|
+
|
|
27
|
+
import asyncio
|
|
28
|
+
from typing import Optional, Dict, Any, List
|
|
29
|
+
import structlog
|
|
30
|
+
import httpx
|
|
31
|
+
from bs4 import BeautifulSoup
|
|
32
|
+
from markdownify import markdownify as md
|
|
33
|
+
from ddgs import DDGS
|
|
34
|
+
|
|
35
|
+
logger = structlog.get_logger()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class WebScraperTool:
|
|
39
|
+
"""Web scraping tool for extracting content from URLs."""
|
|
40
|
+
|
|
41
|
+
def __init__(self):
|
|
42
|
+
self.timeout = 30.0
|
|
43
|
+
self.max_content_length = 1_000_000
|
|
44
|
+
|
|
45
|
+
async def scrape_url(
|
|
46
|
+
self,
|
|
47
|
+
url: str,
|
|
48
|
+
format: str = "markdown",
|
|
49
|
+
render_js: bool = False,
|
|
50
|
+
) -> Dict[str, Any]:
|
|
51
|
+
"""Scrape content from a URL."""
|
|
52
|
+
try:
|
|
53
|
+
logger.info("scraping_url", url=url, format=format, render_js=render_js)
|
|
54
|
+
if render_js:
|
|
55
|
+
return await self._scrape_with_playwright(url, format)
|
|
56
|
+
else:
|
|
57
|
+
return await self._scrape_with_httpx(url, format)
|
|
58
|
+
except Exception as e:
|
|
59
|
+
logger.error("scrape_error", url=url, error=str(e))
|
|
60
|
+
raise
|
|
61
|
+
|
|
62
|
+
async def _scrape_with_httpx(self, url: str, format: str) -> Dict[str, Any]:
|
|
63
|
+
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
64
|
+
response = await client.get(url, follow_redirects=True)
|
|
65
|
+
response.raise_for_status()
|
|
66
|
+
if len(response.content) > self.max_content_length:
|
|
67
|
+
raise ValueError(f"Content too large: {len(response.content)} bytes")
|
|
68
|
+
soup = BeautifulSoup(response.text, 'lxml')
|
|
69
|
+
return self._extract_content(soup, url, format)
|
|
70
|
+
|
|
71
|
+
async def _scrape_with_playwright(self, url: str, format: str) -> Dict[str, Any]:
|
|
72
|
+
from playwright.async_api import async_playwright
|
|
73
|
+
async with async_playwright() as p:
|
|
74
|
+
browser = await p.chromium.launch(headless=True)
|
|
75
|
+
page = await browser.new_page()
|
|
76
|
+
try:
|
|
77
|
+
await page.goto(url, wait_until='networkidle')
|
|
78
|
+
content = await page.content()
|
|
79
|
+
soup = BeautifulSoup(content, 'lxml')
|
|
80
|
+
result = self._extract_content(soup, url, format)
|
|
81
|
+
finally:
|
|
82
|
+
await browser.close()
|
|
83
|
+
return result
|
|
84
|
+
|
|
85
|
+
def _extract_content(self, soup, url: str, format: str) -> Dict[str, Any]:
|
|
86
|
+
for script in soup(["script", "style", "nav", "footer", "header"]):
|
|
87
|
+
script.decompose()
|
|
88
|
+
|
|
89
|
+
title = soup.title.string if soup.title else ""
|
|
90
|
+
main_content = soup.find('main') or soup.find('article') or soup.body
|
|
91
|
+
if not main_content:
|
|
92
|
+
main_content = soup
|
|
93
|
+
|
|
94
|
+
metadata = {
|
|
95
|
+
"title": title,
|
|
96
|
+
"url": url,
|
|
97
|
+
"description": self._get_meta_tag(soup, "description"),
|
|
98
|
+
"keywords": self._get_meta_tag(soup, "keywords"),
|
|
99
|
+
"author": self._get_meta_tag(soup, "author"),
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
links = []
|
|
103
|
+
for link in soup.find_all('a', href=True):
|
|
104
|
+
href = link['href']
|
|
105
|
+
if href.startswith('http'):
|
|
106
|
+
links.append({"text": link.get_text(strip=True), "url": href})
|
|
107
|
+
|
|
108
|
+
if format == "markdown":
|
|
109
|
+
content = md(str(main_content), heading_style="ATX")
|
|
110
|
+
elif format == "text":
|
|
111
|
+
content = main_content.get_text(separator='\\n', strip=True)
|
|
112
|
+
else:
|
|
113
|
+
content = str(main_content)
|
|
114
|
+
|
|
115
|
+
return {
|
|
116
|
+
"content": content,
|
|
117
|
+
"metadata": metadata,
|
|
118
|
+
"links": links[:50],
|
|
119
|
+
"word_count": len(content.split()),
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
def _get_meta_tag(self, soup, name: str) -> Optional[str]:
|
|
123
|
+
tag = soup.find("meta", attrs={"name": name}) or \\
|
|
124
|
+
soup.find("meta", attrs={"property": f"og:{name}"})
|
|
125
|
+
return tag.get("content") if tag else None
|
|
126
|
+
|
|
127
|
+
async def extract_links(self, url: str) -> List[str]:
|
|
128
|
+
result = await self.scrape_url(url, format="html")
|
|
129
|
+
return [link["url"] for link in result["links"]]
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class WebSearchTool:
|
|
133
|
+
"""Web search tool using DuckDuckGo."""
|
|
134
|
+
|
|
135
|
+
def __init__(self):
|
|
136
|
+
self.ddgs = DDGS()
|
|
137
|
+
|
|
138
|
+
async def search(
|
|
139
|
+
self,
|
|
140
|
+
query: str,
|
|
141
|
+
max_results: int = 10,
|
|
142
|
+
region: str = "wt-wt",
|
|
143
|
+
safesearch: str = "moderate",
|
|
144
|
+
) -> List[Dict[str, Any]]:
|
|
145
|
+
"""Search the web using DuckDuckGo."""
|
|
146
|
+
try:
|
|
147
|
+
logger.info("web_search", query=query, max_results=max_results)
|
|
148
|
+
loop = asyncio.get_event_loop()
|
|
149
|
+
results = await loop.run_in_executor(
|
|
150
|
+
None,
|
|
151
|
+
lambda: list(self.ddgs.text(
|
|
152
|
+
query, region=region, safesearch=safesearch, max_results=max_results
|
|
153
|
+
))
|
|
154
|
+
)
|
|
155
|
+
formatted = [
|
|
156
|
+
{
|
|
157
|
+
"position": i + 1,
|
|
158
|
+
"title": result.get("title", ""),
|
|
159
|
+
"url": result.get("href", ""),
|
|
160
|
+
"snippet": result.get("body", ""),
|
|
161
|
+
}
|
|
162
|
+
for i, result in enumerate(results)
|
|
163
|
+
]
|
|
164
|
+
logger.info("search_completed", results_count=len(formatted))
|
|
165
|
+
return formatted
|
|
166
|
+
except Exception as e:
|
|
167
|
+
logger.error("search_error", query=query, error=str(e))
|
|
168
|
+
raise
|
|
169
|
+
|
|
170
|
+
async def search_news(self, query: str, max_results: int = 10, region: str = "wt-wt") -> List[Dict[str, Any]]:
|
|
171
|
+
"""Search news articles."""
|
|
172
|
+
try:
|
|
173
|
+
loop = asyncio.get_event_loop()
|
|
174
|
+
results = await loop.run_in_executor(
|
|
175
|
+
None,
|
|
176
|
+
lambda: list(self.ddgs.news(query, region=region, max_results=max_results))
|
|
177
|
+
)
|
|
178
|
+
return [
|
|
179
|
+
{
|
|
180
|
+
"position": i + 1,
|
|
181
|
+
"title": r.get("title", ""),
|
|
182
|
+
"url": r.get("url", ""),
|
|
183
|
+
"snippet": r.get("body", ""),
|
|
184
|
+
"source": r.get("source", ""),
|
|
185
|
+
"date": r.get("date", ""),
|
|
186
|
+
}
|
|
187
|
+
for i, r in enumerate(results)
|
|
188
|
+
]
|
|
189
|
+
except Exception as e:
|
|
190
|
+
logger.error("news_search_error", query=query, error=str(e))
|
|
191
|
+
raise
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
web_scraper = WebScraperTool()
|
|
195
|
+
web_search = WebSearchTool()
|
|
196
|
+
`;
|
|
197
|
+
}
|
|
198
|
+
// ============================================================================
|
|
199
|
+
// Document Tools — PDF, DOCX, XLSX, PPTX extraction
|
|
200
|
+
// ============================================================================
|
|
201
|
+
function generateDocumentTools() {
|
|
202
|
+
return `"""Document Processing Tools. Auto-generated by ChimerAI CLI."""
|
|
203
|
+
|
|
204
|
+
from typing import Dict, Any, List, Optional
|
|
205
|
+
from pathlib import Path
|
|
206
|
+
import structlog
|
|
207
|
+
|
|
208
|
+
logger = structlog.get_logger()
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class PDFTool:
|
|
212
|
+
"""PDF processing tool — text, table, and OCR extraction."""
|
|
213
|
+
|
|
214
|
+
def __init__(self):
|
|
215
|
+
self.max_pages = 1000
|
|
216
|
+
|
|
217
|
+
async def extract_text(
|
|
218
|
+
self, file_path: str, use_ocr: bool = False, page_range: Optional[tuple] = None
|
|
219
|
+
) -> Dict[str, Any]:
|
|
220
|
+
try:
|
|
221
|
+
import pdfplumber
|
|
222
|
+
logger.info("extracting_pdf", file=file_path, ocr=use_ocr)
|
|
223
|
+
with pdfplumber.open(file_path) as pdf:
|
|
224
|
+
metadata = pdf.metadata
|
|
225
|
+
total_pages = len(pdf.pages)
|
|
226
|
+
if total_pages > self.max_pages:
|
|
227
|
+
raise ValueError(f"PDF too large: {total_pages} pages (max {self.max_pages})")
|
|
228
|
+
|
|
229
|
+
pages = pdf.pages[page_range[0]:page_range[1]] if page_range else pdf.pages
|
|
230
|
+
page_texts = []
|
|
231
|
+
for i, page in enumerate(pages):
|
|
232
|
+
text = page.extract_text()
|
|
233
|
+
if use_ocr and (not text or len(text.strip()) < 10):
|
|
234
|
+
text = await self._ocr_page(page)
|
|
235
|
+
page_texts.append({
|
|
236
|
+
"page_number": i + 1,
|
|
237
|
+
"text": text or "",
|
|
238
|
+
"word_count": len(text.split()) if text else 0,
|
|
239
|
+
})
|
|
240
|
+
|
|
241
|
+
full_text = "\\n\\n".join(
|
|
242
|
+
f"--- Page {p['page_number']} ---\\n{p['text']}" for p in page_texts
|
|
243
|
+
)
|
|
244
|
+
return {
|
|
245
|
+
"text": full_text,
|
|
246
|
+
"pages": page_texts,
|
|
247
|
+
"total_pages": total_pages,
|
|
248
|
+
"metadata": metadata,
|
|
249
|
+
"word_count": sum(p["word_count"] for p in page_texts),
|
|
250
|
+
}
|
|
251
|
+
except Exception as e:
|
|
252
|
+
logger.error("pdf_extraction_error", file=file_path, error=str(e))
|
|
253
|
+
raise
|
|
254
|
+
|
|
255
|
+
async def extract_tables(self, file_path: str) -> List[List[List[str]]]:
|
|
256
|
+
try:
|
|
257
|
+
import pdfplumber
|
|
258
|
+
tables = []
|
|
259
|
+
with pdfplumber.open(file_path) as pdf:
|
|
260
|
+
for page in pdf.pages:
|
|
261
|
+
page_tables = page.extract_tables()
|
|
262
|
+
if page_tables:
|
|
263
|
+
tables.extend(page_tables)
|
|
264
|
+
logger.info("tables_extracted", count=len(tables))
|
|
265
|
+
return tables
|
|
266
|
+
except Exception as e:
|
|
267
|
+
logger.error("table_extraction_error", error=str(e))
|
|
268
|
+
raise
|
|
269
|
+
|
|
270
|
+
async def _ocr_page(self, page) -> str:
|
|
271
|
+
try:
|
|
272
|
+
import pytesseract
|
|
273
|
+
image = page.to_image(resolution=300).original
|
|
274
|
+
return pytesseract.image_to_string(image)
|
|
275
|
+
except Exception as e:
|
|
276
|
+
logger.warning("ocr_failed", error=str(e))
|
|
277
|
+
return ""
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class DocumentTool:
|
|
281
|
+
"""Multi-format document processing: DOCX, XLSX, PPTX, TXT."""
|
|
282
|
+
|
|
283
|
+
async def extract_docx(self, file_path: str) -> Dict[str, Any]:
|
|
284
|
+
try:
|
|
285
|
+
from docx import Document
|
|
286
|
+
doc = Document(file_path)
|
|
287
|
+
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
|
|
288
|
+
tables = []
|
|
289
|
+
for table in doc.tables:
|
|
290
|
+
table_data = [[cell.text for cell in row.cells] for row in table.rows]
|
|
291
|
+
tables.append(table_data)
|
|
292
|
+
full_text = "\\n\\n".join(paragraphs)
|
|
293
|
+
return {"text": full_text, "paragraphs": paragraphs, "tables": tables, "word_count": len(full_text.split())}
|
|
294
|
+
except Exception as e:
|
|
295
|
+
logger.error("docx_extraction_error", error=str(e))
|
|
296
|
+
raise
|
|
297
|
+
|
|
298
|
+
async def extract_xlsx(self, file_path: str) -> Dict[str, Any]:
|
|
299
|
+
try:
|
|
300
|
+
import openpyxl
|
|
301
|
+
wb = openpyxl.load_workbook(file_path, data_only=True)
|
|
302
|
+
sheets = {}
|
|
303
|
+
for sheet_name in wb.sheetnames:
|
|
304
|
+
sheet = wb[sheet_name]
|
|
305
|
+
data = [list(row) for row in sheet.iter_rows(values_only=True)]
|
|
306
|
+
sheets[sheet_name] = data
|
|
307
|
+
return {"sheets": sheets, "sheet_names": wb.sheetnames}
|
|
308
|
+
except Exception as e:
|
|
309
|
+
logger.error("xlsx_extraction_error", error=str(e))
|
|
310
|
+
raise
|
|
311
|
+
|
|
312
|
+
async def extract_pptx(self, file_path: str) -> Dict[str, Any]:
|
|
313
|
+
try:
|
|
314
|
+
from pptx import Presentation
|
|
315
|
+
prs = Presentation(file_path)
|
|
316
|
+
slides = []
|
|
317
|
+
for i, slide in enumerate(prs.slides):
|
|
318
|
+
slide_text = [shape.text for shape in slide.shapes if hasattr(shape, "text")]
|
|
319
|
+
slides.append({"slide_number": i + 1, "text": "\\n".join(slide_text)})
|
|
320
|
+
full_text = "\\n\\n".join(f"--- Slide {s['slide_number']} ---\\n{s['text']}" for s in slides)
|
|
321
|
+
return {"text": full_text, "slides": slides, "total_slides": len(slides)}
|
|
322
|
+
except Exception as e:
|
|
323
|
+
logger.error("pptx_extraction_error", error=str(e))
|
|
324
|
+
raise
|
|
325
|
+
|
|
326
|
+
async def extract_text_file(self, file_path: str) -> Dict[str, Any]:
|
|
327
|
+
try:
|
|
328
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
329
|
+
text = f.read()
|
|
330
|
+
return {"text": text, "word_count": len(text.split()), "line_count": len(text.splitlines())}
|
|
331
|
+
except Exception as e:
|
|
332
|
+
logger.error("text_extraction_error", error=str(e))
|
|
333
|
+
raise
|
|
334
|
+
|
|
335
|
+
async def process_file(self, file_path: str) -> Dict[str, Any]:
|
|
336
|
+
suffix = Path(file_path).suffix.lower()
|
|
337
|
+
if suffix == '.pdf':
|
|
338
|
+
return await PDFTool().extract_text(file_path)
|
|
339
|
+
elif suffix == '.docx':
|
|
340
|
+
return await self.extract_docx(file_path)
|
|
341
|
+
elif suffix == '.xlsx':
|
|
342
|
+
return await self.extract_xlsx(file_path)
|
|
343
|
+
elif suffix == '.pptx':
|
|
344
|
+
return await self.extract_pptx(file_path)
|
|
345
|
+
elif suffix in ['.txt', '.md', '.csv']:
|
|
346
|
+
return await self.extract_text_file(file_path)
|
|
347
|
+
else:
|
|
348
|
+
raise ValueError(f"Unsupported file type: {suffix}")
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
pdf_tool = PDFTool()
|
|
352
|
+
document_tool = DocumentTool()
|
|
353
|
+
`;
|
|
354
|
+
}
|
|
355
|
+
// ============================================================================
|
|
356
|
+
// Code Tools — Sandboxed Python execution
|
|
357
|
+
// ============================================================================
|
|
358
|
+
function generateCodeTools() {
|
|
359
|
+
return `"""Sandboxed Code Execution Tool. Auto-generated by ChimerAI CLI."""
|
|
360
|
+
|
|
361
|
+
import sys
|
|
362
|
+
import io
|
|
363
|
+
import time
|
|
364
|
+
import traceback
|
|
365
|
+
from typing import Dict, Any, Optional
|
|
366
|
+
import structlog
|
|
367
|
+
from RestrictedPython import compile_restricted, safe_globals, safe_builtins, PrintCollector
|
|
368
|
+
from RestrictedPython.Eval import default_guarded_getitem
|
|
369
|
+
|
|
370
|
+
import os
|
|
371
|
+
|
|
372
|
+
logger = structlog.get_logger()
|
|
373
|
+
|
|
374
|
+
MAX_CODE_LENGTH = int(os.environ.get("CODE_MAX_LENGTH", 2000))
|
|
375
|
+
MAX_OUTPUT_LENGTH = int(os.environ.get("CODE_MAX_OUTPUT", 10_000))
|
|
376
|
+
MAX_EXECUTION_TIME = int(os.environ.get("CODE_MAX_EXECUTION_TIME", 10))
|
|
377
|
+
|
|
378
|
+
BLOCKED_IMPORTS = {
|
|
379
|
+
"os", "sys", "subprocess", "socket", "requests", "urllib",
|
|
380
|
+
"http", "ftplib", "smtplib", "importlib", "builtins",
|
|
381
|
+
"shutil", "pathlib", "glob", "io", "tempfile", "pickle",
|
|
382
|
+
"ctypes", "threading", "multiprocessing", "signal",
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
ALLOWED_MODULES = {
|
|
386
|
+
"math", "random", "json", "datetime", "re", "csv", "statistics",
|
|
387
|
+
"collections", "itertools", "functools", "string",
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def _safe_import(name: str, *args, **kwargs):
|
|
392
|
+
if name in BLOCKED_IMPORTS:
|
|
393
|
+
raise ImportError(f"Import of '{name}' is not allowed in the sandbox.")
|
|
394
|
+
if name not in ALLOWED_MODULES:
|
|
395
|
+
raise ImportError(f"Import of '{name}' is not available in the sandbox.")
|
|
396
|
+
return __import__(name, *args, **kwargs)
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def _safe_unpack_sequence_(it, spec, getiter=iter):
|
|
400
|
+
"""RestrictedPython guard for tuple unpacking (Python 3.11+ calling convention)."""
|
|
401
|
+
if isinstance(spec, dict):
|
|
402
|
+
childs = spec.get('childs', ())
|
|
403
|
+
min_len = spec.get('min_len', 0)
|
|
404
|
+
else:
|
|
405
|
+
childs = ()
|
|
406
|
+
min_len = spec if isinstance(spec, int) else 0
|
|
407
|
+
getiter = getiter if callable(getiter) else iter
|
|
408
|
+
result = list(getiter(it))
|
|
409
|
+
if len(result) < min_len:
|
|
410
|
+
raise ValueError(f"not enough values to unpack (expected {min_len}, got {len(result)})")
|
|
411
|
+
if not childs:
|
|
412
|
+
return result
|
|
413
|
+
final = []
|
|
414
|
+
for i, item in enumerate(result):
|
|
415
|
+
sub_spec = childs[i] if i < len(childs) else None
|
|
416
|
+
if sub_spec is not None:
|
|
417
|
+
final.append(_safe_unpack_sequence_(item, sub_spec, getiter))
|
|
418
|
+
else:
|
|
419
|
+
final.append(item)
|
|
420
|
+
return final
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def _safe_inplacevar_(op: str, x, y):
|
|
424
|
+
"""RestrictedPython guard for augmented assignment (+=, -=, etc.)."""
|
|
425
|
+
ops = {
|
|
426
|
+
'+=': lambda a, b: a + b, '-=': lambda a, b: a - b,
|
|
427
|
+
'*=': lambda a, b: a * b, '/=': lambda a, b: a / b,
|
|
428
|
+
'//=': lambda a, b: a // b, '%=': lambda a, b: a % b,
|
|
429
|
+
'**=': lambda a, b: a ** b, '&=': lambda a, b: a & b,
|
|
430
|
+
'|=': lambda a, b: a | b, '^=': lambda a, b: a ^ b,
|
|
431
|
+
'>>=': lambda a, b: a >> b, '<<=': lambda a, b: a << b,
|
|
432
|
+
}
|
|
433
|
+
if op not in ops:
|
|
434
|
+
raise ValueError(f"Operator '{op}' is not allowed.")
|
|
435
|
+
return ops[op](x, y)
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
class CodeInterpreterTool:
|
|
439
|
+
"""Sandboxed Python code execution tool."""
|
|
440
|
+
|
|
441
|
+
async def execute(self, code: str, timeout: int = 5) -> Dict[str, Any]:
|
|
442
|
+
if len(code) > MAX_CODE_LENGTH:
|
|
443
|
+
return {"success": False, "output": None,
|
|
444
|
+
"error": f"Code exceeds {MAX_CODE_LENGTH} character limit.", "executionTime": 0}
|
|
445
|
+
|
|
446
|
+
if "while True" in code or "while 1" in code:
|
|
447
|
+
return {"success": False, "output": None,
|
|
448
|
+
"error": "Infinite loops are not allowed in the sandbox.", "executionTime": 0}
|
|
449
|
+
|
|
450
|
+
stdout_buffer = io.StringIO()
|
|
451
|
+
old_stdout = sys.stdout
|
|
452
|
+
try:
|
|
453
|
+
byte_code = compile_restricted(code, filename="<user_code>", mode="exec")
|
|
454
|
+
env = safe_globals.copy()
|
|
455
|
+
env["__builtins__"] = safe_builtins.copy()
|
|
456
|
+
env["__builtins__"]["__import__"] = _safe_import
|
|
457
|
+
env["__builtins__"].update({
|
|
458
|
+
"range": range, "len": len, "list": list, "dict": dict,
|
|
459
|
+
"set": set, "tuple": tuple, "int": int, "float": float,
|
|
460
|
+
"str": str, "bool": bool, "abs": abs, "min": min, "max": max,
|
|
461
|
+
"sum": sum, "round": round, "sorted": sorted, "reversed": reversed,
|
|
462
|
+
"enumerate": enumerate, "zip": zip, "map": map, "filter": filter,
|
|
463
|
+
"any": any, "all": all, "isinstance": isinstance, "type": type,
|
|
464
|
+
"repr": repr, "hex": hex, "oct": oct, "bin": bin,
|
|
465
|
+
"ord": ord, "chr": chr, "format": format,
|
|
466
|
+
"hasattr": hasattr, "getattr": getattr, "setattr": setattr,
|
|
467
|
+
"vars": vars, "dir": dir, "super": super,
|
|
468
|
+
})
|
|
469
|
+
env["_print_"] = PrintCollector
|
|
470
|
+
env["_getiter_"] = iter
|
|
471
|
+
env["_getitem_"] = default_guarded_getitem
|
|
472
|
+
env["_iter_unpack_sequence_"] = _safe_unpack_sequence_
|
|
473
|
+
env["_unpack_sequence_"] = _safe_unpack_sequence_
|
|
474
|
+
env["_write_"] = lambda x: x
|
|
475
|
+
env["_inplacevar_"] = _safe_inplacevar_
|
|
476
|
+
env["printed"] = ""
|
|
477
|
+
|
|
478
|
+
import math, random, json, datetime, re, csv, collections, itertools, functools, string, statistics
|
|
479
|
+
for mod in [math, random, json, datetime, re, csv, collections, itertools, functools, string, statistics]:
|
|
480
|
+
env[mod.__name__] = mod
|
|
481
|
+
|
|
482
|
+
sys.stdout = stdout_buffer
|
|
483
|
+
start = time.perf_counter()
|
|
484
|
+
exec(byte_code, env)
|
|
485
|
+
elapsed_ms = int((time.perf_counter() - start) * 1000)
|
|
486
|
+
|
|
487
|
+
output = env["_print"]() if "_print" in env and callable(env["_print"]) else stdout_buffer.getvalue()
|
|
488
|
+
if len(output) > MAX_OUTPUT_LENGTH:
|
|
489
|
+
output = output[:MAX_OUTPUT_LENGTH] + "\\n... [truncated]"
|
|
490
|
+
|
|
491
|
+
return {"success": True, "output": output, "error": None, "executionTime": elapsed_ms}
|
|
492
|
+
|
|
493
|
+
except Exception as e:
|
|
494
|
+
msg = str(e)
|
|
495
|
+
if "not allowed" in msg or "not available" in msg:
|
|
496
|
+
msg += "\\nConfigure allowed modules in your deployment."
|
|
497
|
+
return {"success": False, "output": stdout_buffer.getvalue() or None,
|
|
498
|
+
"error": msg, "executionTime": 0}
|
|
499
|
+
finally:
|
|
500
|
+
sys.stdout = old_stdout
|
|
501
|
+
|
|
502
|
+
# Backwards-compatible alias
|
|
503
|
+
async def execute_code(self, code: str, inputs: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
504
|
+
result = await self.execute(code)
|
|
505
|
+
return {"success": result["success"], "output": result["output"],
|
|
506
|
+
"error": result["error"], "result": None}
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
code_interpreter = CodeInterpreterTool()
|
|
510
|
+
`;
|
|
511
|
+
}
|
|
512
|
+
// ============================================================================
|
|
513
|
+
// NLP Tools — Summarize, Sentiment, Extract, Classify, Chunk, Q&A
|
|
514
|
+
// NOTE: Regex backslashes are double-escaped for JS template literals.
|
|
515
|
+
// ============================================================================
|
|
516
|
+
function generateNlpTools() {
|
|
517
|
+
return `"""NLP Tools — Summarization, Sentiment, Extraction, Classification, Q&A.
|
|
518
|
+
Auto-generated by ChimerAI CLI."""
|
|
519
|
+
|
|
520
|
+
from typing import List, Dict, Any, Optional, Literal
|
|
521
|
+
import structlog
|
|
522
|
+
from litellm import acompletion
|
|
523
|
+
import tiktoken
|
|
524
|
+
|
|
525
|
+
logger = structlog.get_logger()
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
class SummarizationTool:
|
|
529
|
+
"""Summarizes text using different strategies."""
|
|
530
|
+
|
|
531
|
+
def __init__(self, model: str = "gpt-3.5-turbo"):
|
|
532
|
+
self.model = model
|
|
533
|
+
self.encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
|
534
|
+
|
|
535
|
+
def count_tokens(self, text: str) -> int:
|
|
536
|
+
return len(self.encoding.encode(text))
|
|
537
|
+
|
|
538
|
+
async def summarize(
|
|
539
|
+
self,
|
|
540
|
+
text: str,
|
|
541
|
+
mode: Literal["concise", "detailed", "bullet_points", "key_points"] = "concise",
|
|
542
|
+
max_length: Optional[int] = None,
|
|
543
|
+
language: str = "en",
|
|
544
|
+
) -> Dict[str, Any]:
|
|
545
|
+
try:
|
|
546
|
+
logger.info("summarizing_text", mode=mode, text_length=len(text))
|
|
547
|
+
mode_prompts = {
|
|
548
|
+
"concise": "Provide a concise summary of the following text in 2-3 sentences:",
|
|
549
|
+
"detailed": "Provide a detailed summary covering all main points:",
|
|
550
|
+
"bullet_points": "Summarize the following text as bullet points (5-7 points):",
|
|
551
|
+
"key_points": "Extract the key points as a numbered list:",
|
|
552
|
+
}
|
|
553
|
+
system_prompt = mode_prompts.get(mode, mode_prompts["concise"])
|
|
554
|
+
if max_length:
|
|
555
|
+
system_prompt += f" Keep the summary under {max_length} words."
|
|
556
|
+
if language != "en":
|
|
557
|
+
system_prompt += f" Provide the summary in {language}."
|
|
558
|
+
|
|
559
|
+
token_count = self.count_tokens(text)
|
|
560
|
+
|
|
561
|
+
if token_count > 12000:
|
|
562
|
+
summary = await self._map_reduce_summarize(text, mode, max_length, language)
|
|
563
|
+
else:
|
|
564
|
+
response = await acompletion(
|
|
565
|
+
model=self.model,
|
|
566
|
+
messages=[
|
|
567
|
+
{"role": "system", "content": system_prompt},
|
|
568
|
+
{"role": "user", "content": text},
|
|
569
|
+
],
|
|
570
|
+
temperature=0.3,
|
|
571
|
+
)
|
|
572
|
+
summary = response.choices[0].message.content
|
|
573
|
+
|
|
574
|
+
return {
|
|
575
|
+
"summary": summary,
|
|
576
|
+
"mode": mode,
|
|
577
|
+
"original_length": len(text),
|
|
578
|
+
"summary_length": len(summary),
|
|
579
|
+
"compression_ratio": round(len(summary) / len(text), 2),
|
|
580
|
+
"token_count": token_count,
|
|
581
|
+
}
|
|
582
|
+
except Exception as e:
|
|
583
|
+
logger.error("summarization_failed", error=str(e))
|
|
584
|
+
raise
|
|
585
|
+
|
|
586
|
+
async def _map_reduce_summarize(self, text, mode, max_length, language):
|
|
587
|
+
chunks = self._split_into_chunks(text, chunk_size=10000)
|
|
588
|
+
chunk_summaries = []
|
|
589
|
+
for i, chunk in enumerate(chunks):
|
|
590
|
+
response = await acompletion(
|
|
591
|
+
model=self.model,
|
|
592
|
+
messages=[
|
|
593
|
+
{"role": "system", "content": "Summarize this text section concisely:"},
|
|
594
|
+
{"role": "user", "content": chunk},
|
|
595
|
+
],
|
|
596
|
+
temperature=0.3,
|
|
597
|
+
)
|
|
598
|
+
chunk_summaries.append(response.choices[0].message.content)
|
|
599
|
+
|
|
600
|
+
combined = "\\n\\n".join(chunk_summaries)
|
|
601
|
+
final_prompt = f"Combine these summaries into a {mode} final summary:"
|
|
602
|
+
if max_length:
|
|
603
|
+
final_prompt += f" Keep under {max_length} words."
|
|
604
|
+
if language != "en":
|
|
605
|
+
final_prompt += f" Provide in {language}."
|
|
606
|
+
|
|
607
|
+
response = await acompletion(
|
|
608
|
+
model=self.model,
|
|
609
|
+
messages=[
|
|
610
|
+
{"role": "system", "content": final_prompt},
|
|
611
|
+
{"role": "user", "content": combined},
|
|
612
|
+
],
|
|
613
|
+
temperature=0.3,
|
|
614
|
+
)
|
|
615
|
+
return response.choices[0].message.content
|
|
616
|
+
|
|
617
|
+
def _split_into_chunks(self, text: str, chunk_size: int = 10000) -> List[str]:
|
|
618
|
+
chunks = []
|
|
619
|
+
words = text.split()
|
|
620
|
+
current_chunk = []
|
|
621
|
+
current_length = 0
|
|
622
|
+
for word in words:
|
|
623
|
+
word_length = len(word) + 1
|
|
624
|
+
if current_length + word_length > chunk_size and current_chunk:
|
|
625
|
+
chunks.append(" ".join(current_chunk))
|
|
626
|
+
current_chunk = [word]
|
|
627
|
+
current_length = word_length
|
|
628
|
+
else:
|
|
629
|
+
current_chunk.append(word)
|
|
630
|
+
current_length += word_length
|
|
631
|
+
if current_chunk:
|
|
632
|
+
chunks.append(" ".join(current_chunk))
|
|
633
|
+
return chunks
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
class SentimentAnalysisTool:
|
|
637
|
+
"""Analyzes sentiment and emotions in text."""
|
|
638
|
+
|
|
639
|
+
def __init__(self, model: str = "gpt-3.5-turbo"):
|
|
640
|
+
self.model = model
|
|
641
|
+
|
|
642
|
+
async def analyze_sentiment(
|
|
643
|
+
self, text: str, include_emotions: bool = True, include_aspects: bool = False
|
|
644
|
+
) -> Dict[str, Any]:
|
|
645
|
+
try:
|
|
646
|
+
logger.info("analyzing_sentiment", text_length=len(text))
|
|
647
|
+
system_prompt = "Analyze the sentiment. Provide: 1. Overall sentiment (positive/negative/neutral) 2. Confidence (0-1) 3. Polarity (-1 to 1)"
|
|
648
|
+
if include_emotions:
|
|
649
|
+
system_prompt += " 4. Detected emotions (joy, sadness, anger, fear, surprise, disgust)"
|
|
650
|
+
if include_aspects:
|
|
651
|
+
system_prompt += " 5. Aspect-based sentiment"
|
|
652
|
+
system_prompt += " Respond in JSON format."
|
|
653
|
+
|
|
654
|
+
response = await acompletion(
|
|
655
|
+
model=self.model,
|
|
656
|
+
messages=[
|
|
657
|
+
{"role": "system", "content": system_prompt},
|
|
658
|
+
{"role": "user", "content": text},
|
|
659
|
+
],
|
|
660
|
+
temperature=0.2,
|
|
661
|
+
response_format={"type": "json_object"},
|
|
662
|
+
)
|
|
663
|
+
import json
|
|
664
|
+
result = json.loads(response.choices[0].message.content)
|
|
665
|
+
return {"text": text[:200] + "..." if len(text) > 200 else text, **result}
|
|
666
|
+
except Exception as e:
|
|
667
|
+
logger.error("sentiment_analysis_failed", error=str(e))
|
|
668
|
+
raise
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
class InformationExtractorTool:
|
|
672
|
+
"""Extracts structured information from unstructured text."""
|
|
673
|
+
|
|
674
|
+
def __init__(self, model: str = "gpt-3.5-turbo-1106"):
|
|
675
|
+
self.model = model
|
|
676
|
+
|
|
677
|
+
async def extract_information(
|
|
678
|
+
self, text: str, schema: Optional[Dict[str, str]] = None, entity_types: Optional[List[str]] = None
|
|
679
|
+
) -> Dict[str, Any]:
|
|
680
|
+
try:
|
|
681
|
+
logger.info("extracting_information", text_length=len(text))
|
|
682
|
+
if schema:
|
|
683
|
+
fields_desc = chr(10).join([f'- {f}: {d}' for f, d in schema.items()])
|
|
684
|
+
system_prompt = f"Extract the following from the text:\\n{fields_desc}\\nProvide in JSON. Use null if not found."
|
|
685
|
+
elif entity_types:
|
|
686
|
+
types_desc = chr(10).join([f'- {t}' for t in entity_types])
|
|
687
|
+
system_prompt = f"Extract these entity types:\\n{types_desc}\\nProvide in JSON with 'entities' key."
|
|
688
|
+
else:
|
|
689
|
+
system_prompt = "Extract all key information (people, organizations, dates, locations, amounts). Provide in JSON with 'entities' key."
|
|
690
|
+
|
|
691
|
+
response = await acompletion(
|
|
692
|
+
model=self.model,
|
|
693
|
+
messages=[
|
|
694
|
+
{"role": "system", "content": system_prompt},
|
|
695
|
+
{"role": "user", "content": text},
|
|
696
|
+
],
|
|
697
|
+
temperature=0.1,
|
|
698
|
+
response_format={"type": "json_object"},
|
|
699
|
+
)
|
|
700
|
+
import json
|
|
701
|
+
extracted = json.loads(response.choices[0].message.content)
|
|
702
|
+
return {**extracted, "source_text_length": len(text), "extraction_type": "schema" if schema else ("entities" if entity_types else "general")}
|
|
703
|
+
except Exception as e:
|
|
704
|
+
logger.error("information_extraction_failed", error=str(e))
|
|
705
|
+
raise
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
class TextClassifierTool:
|
|
709
|
+
"""Zero-shot text classification."""
|
|
710
|
+
|
|
711
|
+
def __init__(self, model: str = "gpt-3.5-turbo"):
|
|
712
|
+
self.model = model
|
|
713
|
+
|
|
714
|
+
async def classify(
|
|
715
|
+
self, text: str, categories: List[str], multi_label: bool = False, include_confidence: bool = True
|
|
716
|
+
) -> Dict[str, Any]:
|
|
717
|
+
try:
|
|
718
|
+
logger.info("classifying_text", num_categories=len(categories))
|
|
719
|
+
cats_desc = chr(10).join([f'- {c}' for c in categories])
|
|
720
|
+
if multi_label:
|
|
721
|
+
system_prompt = f"Classify into one or more categories:\\n{cats_desc}\\nRespond in JSON with 'categories' list."
|
|
722
|
+
else:
|
|
723
|
+
system_prompt = f"Classify into exactly ONE category:\\n{cats_desc}\\nRespond in JSON with 'category' field."
|
|
724
|
+
|
|
725
|
+
response = await acompletion(
|
|
726
|
+
model=self.model,
|
|
727
|
+
messages=[
|
|
728
|
+
{"role": "system", "content": system_prompt},
|
|
729
|
+
{"role": "user", "content": text},
|
|
730
|
+
],
|
|
731
|
+
temperature=0.2,
|
|
732
|
+
response_format={"type": "json_object"},
|
|
733
|
+
)
|
|
734
|
+
import json
|
|
735
|
+
result = json.loads(response.choices[0].message.content)
|
|
736
|
+
return {"text": text[:200] + "..." if len(text) > 200 else text, "available_categories": categories, "multi_label": multi_label, **result}
|
|
737
|
+
except Exception as e:
|
|
738
|
+
logger.error("text_classification_failed", error=str(e))
|
|
739
|
+
raise
|
|
740
|
+
|
|
741
|
+
|
|
742
|
+
class TextChunkingTool:
|
|
743
|
+
"""Splits text into chunks using various strategies."""
|
|
744
|
+
|
|
745
|
+
def __init__(self):
|
|
746
|
+
self.encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
|
747
|
+
|
|
748
|
+
def chunk_text(
|
|
749
|
+
self,
|
|
750
|
+
text: str,
|
|
751
|
+
strategy: Literal["fixed_size", "sentence", "paragraph", "semantic", "token_based"] = "token_based",
|
|
752
|
+
chunk_size: int = 1000,
|
|
753
|
+
chunk_overlap: int = 200,
|
|
754
|
+
max_chunks: Optional[int] = None,
|
|
755
|
+
) -> Dict[str, Any]:
|
|
756
|
+
try:
|
|
757
|
+
logger.info("chunking_text", strategy=strategy, chunk_size=chunk_size)
|
|
758
|
+
if strategy == "token_based":
|
|
759
|
+
chunks = self._chunk_by_tokens(text, chunk_size, chunk_overlap)
|
|
760
|
+
elif strategy == "sentence":
|
|
761
|
+
chunks = self._chunk_by_sentences(text, chunk_size, chunk_overlap)
|
|
762
|
+
elif strategy == "paragraph":
|
|
763
|
+
chunks = self._chunk_by_paragraphs(text, chunk_size, chunk_overlap)
|
|
764
|
+
elif strategy == "fixed_size":
|
|
765
|
+
chunks = self._chunk_fixed_size(text, chunk_size, chunk_overlap)
|
|
766
|
+
elif strategy == "semantic":
|
|
767
|
+
chunks = self._chunk_semantic(text, chunk_size)
|
|
768
|
+
else:
|
|
769
|
+
raise ValueError(f"Unknown strategy: {strategy}")
|
|
770
|
+
|
|
771
|
+
if max_chunks and len(chunks) > max_chunks:
|
|
772
|
+
chunks = chunks[:max_chunks]
|
|
773
|
+
|
|
774
|
+
chunk_data = [
|
|
775
|
+
{"chunk_id": i, "text": chunk, "char_count": len(chunk), "token_count": len(self.encoding.encode(chunk)) if i < 10 else None}
|
|
776
|
+
for i, chunk in enumerate(chunks)
|
|
777
|
+
]
|
|
778
|
+
return {
|
|
779
|
+
"chunks": chunk_data,
|
|
780
|
+
"total_chunks": len(chunk_data),
|
|
781
|
+
"strategy": strategy,
|
|
782
|
+
"original_length": len(text),
|
|
783
|
+
"average_chunk_size": sum(c["char_count"] for c in chunk_data) // len(chunk_data) if chunk_data else 0,
|
|
784
|
+
}
|
|
785
|
+
except Exception as e:
|
|
786
|
+
logger.error("text_chunking_failed", error=str(e))
|
|
787
|
+
raise
|
|
788
|
+
|
|
789
|
+
def _chunk_by_tokens(self, text, chunk_size, overlap):
|
|
790
|
+
tokens = self.encoding.encode(text)
|
|
791
|
+
chunks = []
|
|
792
|
+
i = 0
|
|
793
|
+
while i < len(tokens):
|
|
794
|
+
chunks.append(self.encoding.decode(tokens[i:i + chunk_size]))
|
|
795
|
+
i += chunk_size - overlap
|
|
796
|
+
return chunks
|
|
797
|
+
|
|
798
|
+
def _chunk_by_sentences(self, text, chunk_size, overlap):
|
|
799
|
+
import re
|
|
800
|
+
sentences = re.split(r'(?<=[.!?])\\s+', text)
|
|
801
|
+
chunks = []
|
|
802
|
+
current_chunk = []
|
|
803
|
+
current_size = 0
|
|
804
|
+
for sentence in sentences:
|
|
805
|
+
if current_size + len(sentence) > chunk_size and current_chunk:
|
|
806
|
+
chunks.append(" ".join(current_chunk))
|
|
807
|
+
overlap_sentences = []
|
|
808
|
+
overlap_size = 0
|
|
809
|
+
for s in reversed(current_chunk):
|
|
810
|
+
if overlap_size + len(s) <= overlap:
|
|
811
|
+
overlap_sentences.insert(0, s)
|
|
812
|
+
overlap_size += len(s)
|
|
813
|
+
else:
|
|
814
|
+
break
|
|
815
|
+
current_chunk = overlap_sentences + [sentence]
|
|
816
|
+
current_size = sum(len(s) for s in current_chunk)
|
|
817
|
+
else:
|
|
818
|
+
current_chunk.append(sentence)
|
|
819
|
+
current_size += len(sentence)
|
|
820
|
+
if current_chunk:
|
|
821
|
+
chunks.append(" ".join(current_chunk))
|
|
822
|
+
return chunks
|
|
823
|
+
|
|
824
|
+
def _chunk_by_paragraphs(self, text, chunk_size, overlap):
|
|
825
|
+
paragraphs = text.split("\\n\\n")
|
|
826
|
+
chunks = []
|
|
827
|
+
current_chunk = []
|
|
828
|
+
current_size = 0
|
|
829
|
+
for para in paragraphs:
|
|
830
|
+
if current_size + len(para) > chunk_size and current_chunk:
|
|
831
|
+
chunks.append("\\n\\n".join(current_chunk))
|
|
832
|
+
current_chunk = [para]
|
|
833
|
+
current_size = len(para)
|
|
834
|
+
else:
|
|
835
|
+
current_chunk.append(para)
|
|
836
|
+
current_size += len(para)
|
|
837
|
+
if current_chunk:
|
|
838
|
+
chunks.append("\\n\\n".join(current_chunk))
|
|
839
|
+
return chunks
|
|
840
|
+
|
|
841
|
+
def _chunk_fixed_size(self, text, chunk_size, overlap):
|
|
842
|
+
chunks = []
|
|
843
|
+
i = 0
|
|
844
|
+
while i < len(text):
|
|
845
|
+
chunks.append(text[i:i + chunk_size])
|
|
846
|
+
i += chunk_size - overlap
|
|
847
|
+
return chunks
|
|
848
|
+
|
|
849
|
+
def _chunk_semantic(self, text, max_chunk_size):
|
|
850
|
+
import re
|
|
851
|
+
splits = re.split(r'(\\n\\n|\\.(?=\\s+[A-Z])|[!?](?=\\s+))', text)
|
|
852
|
+
chunks = []
|
|
853
|
+
current_chunk = ""
|
|
854
|
+
for i in range(0, len(splits), 2):
|
|
855
|
+
segment = splits[i]
|
|
856
|
+
separator = splits[i + 1] if i + 1 < len(splits) else ""
|
|
857
|
+
if len(current_chunk) + len(segment) + len(separator) <= max_chunk_size:
|
|
858
|
+
current_chunk += segment + separator
|
|
859
|
+
else:
|
|
860
|
+
if current_chunk:
|
|
861
|
+
chunks.append(current_chunk.strip())
|
|
862
|
+
current_chunk = segment + separator
|
|
863
|
+
if current_chunk:
|
|
864
|
+
chunks.append(current_chunk.strip())
|
|
865
|
+
return chunks
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
class QAChainTool:
|
|
869
|
+
"""Question-Answering with optional context."""
|
|
870
|
+
|
|
871
|
+
def __init__(self, model: str = "gpt-3.5-turbo-1106"):
|
|
872
|
+
self.model = model
|
|
873
|
+
|
|
874
|
+
async def answer_question(
|
|
875
|
+
self,
|
|
876
|
+
question: str,
|
|
877
|
+
context: Optional[str] = None,
|
|
878
|
+
context_documents: Optional[List[str]] = None,
|
|
879
|
+
include_sources: bool = True,
|
|
880
|
+
include_confidence: bool = True,
|
|
881
|
+
chain_of_thought: bool = False,
|
|
882
|
+
) -> Dict[str, Any]:
|
|
883
|
+
try:
|
|
884
|
+
logger.info("answering_question", has_context=bool(context or context_documents))
|
|
885
|
+
|
|
886
|
+
full_context = ""
|
|
887
|
+
if context:
|
|
888
|
+
full_context = f"Context:\\n{context}\\n\\n"
|
|
889
|
+
elif context_documents:
|
|
890
|
+
full_context = "Context Documents:\\n"
|
|
891
|
+
for i, doc in enumerate(context_documents, 1):
|
|
892
|
+
full_context += f"\\n[Document {i}]\\n{doc}\\n"
|
|
893
|
+
full_context += "\\n"
|
|
894
|
+
|
|
895
|
+
system_prompt = "You are a helpful assistant that answers questions accurately."
|
|
896
|
+
if full_context:
|
|
897
|
+
system_prompt += " Base your answer on the provided context."
|
|
898
|
+
if chain_of_thought:
|
|
899
|
+
system_prompt += " Show your reasoning step by step."
|
|
900
|
+
system_prompt += " Respond in JSON with 'answer' field"
|
|
901
|
+
if include_confidence:
|
|
902
|
+
system_prompt += ", 'confidence' (0-1)"
|
|
903
|
+
system_prompt += "."
|
|
904
|
+
|
|
905
|
+
response = await acompletion(
|
|
906
|
+
model=self.model,
|
|
907
|
+
messages=[
|
|
908
|
+
{"role": "system", "content": system_prompt},
|
|
909
|
+
{"role": "user", "content": full_context + f"Question: {question}"},
|
|
910
|
+
],
|
|
911
|
+
temperature=0.3,
|
|
912
|
+
response_format={"type": "json_object"},
|
|
913
|
+
)
|
|
914
|
+
import json
|
|
915
|
+
result = json.loads(response.choices[0].message.content)
|
|
916
|
+
return {
|
|
917
|
+
"question": question,
|
|
918
|
+
"has_context": bool(context or context_documents),
|
|
919
|
+
"num_context_docs": len(context_documents) if context_documents else (1 if context else 0),
|
|
920
|
+
**result,
|
|
921
|
+
}
|
|
922
|
+
except Exception as e:
|
|
923
|
+
logger.error("qa_chain_failed", error=str(e))
|
|
924
|
+
raise
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
summarization_tool = SummarizationTool()
|
|
928
|
+
sentiment_tool = SentimentAnalysisTool()
|
|
929
|
+
information_extractor = InformationExtractorTool()
|
|
930
|
+
text_classifier = TextClassifierTool()
|
|
931
|
+
text_chunker = TextChunkingTool()
|
|
932
|
+
qa_chain = QAChainTool()
|
|
933
|
+
`;
|
|
934
|
+
}
|
|
935
|
+
// ============================================================================
|
|
936
|
+
// Vision Tools — Image analysis using GPT-4 Vision
|
|
937
|
+
// ============================================================================
|
|
938
|
+
function generateVisionTools() {
|
|
939
|
+
return `"""Vision and Image Analysis Tools. Auto-generated by ChimerAI CLI."""
|
|
940
|
+
|
|
941
|
+
from typing import Dict, Any, List, Optional
|
|
942
|
+
import base64
|
|
943
|
+
import io
|
|
944
|
+
import structlog
|
|
945
|
+
from PIL import Image
|
|
946
|
+
import litellm
|
|
947
|
+
|
|
948
|
+
logger = structlog.get_logger()
|
|
949
|
+
|
|
950
|
+
|
|
951
|
+
class VisionTool:
|
|
952
|
+
"""Image analysis tool using GPT-4 Vision."""
|
|
953
|
+
|
|
954
|
+
def __init__(self, model: str = "gpt-4-vision-preview"):
|
|
955
|
+
self.model = model
|
|
956
|
+
|
|
957
|
+
async def analyze_image(
|
|
958
|
+
self,
|
|
959
|
+
image_path: Optional[str] = None,
|
|
960
|
+
image_url: Optional[str] = None,
|
|
961
|
+
image_bytes: Optional[bytes] = None,
|
|
962
|
+
prompt: str = "Describe this image in detail.",
|
|
963
|
+
max_tokens: int = 500,
|
|
964
|
+
) -> Dict[str, Any]:
|
|
965
|
+
try:
|
|
966
|
+
logger.info("analyzing_image", model=self.model)
|
|
967
|
+
if image_url:
|
|
968
|
+
image_content = {"type": "image_url", "image_url": {"url": image_url}}
|
|
969
|
+
elif image_path:
|
|
970
|
+
with open(image_path, 'rb') as f:
|
|
971
|
+
image_bytes = f.read()
|
|
972
|
+
image_content = self._encode_image(image_bytes)
|
|
973
|
+
elif image_bytes:
|
|
974
|
+
image_content = self._encode_image(image_bytes)
|
|
975
|
+
else:
|
|
976
|
+
raise ValueError("No image provided")
|
|
977
|
+
|
|
978
|
+
response = await litellm.acompletion(
|
|
979
|
+
model=self.model,
|
|
980
|
+
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}, image_content]}],
|
|
981
|
+
max_tokens=max_tokens,
|
|
982
|
+
)
|
|
983
|
+
return {
|
|
984
|
+
"description": response.choices[0].message.content,
|
|
985
|
+
"model": response.model,
|
|
986
|
+
"tokens_used": response.usage.total_tokens,
|
|
987
|
+
}
|
|
988
|
+
except Exception as e:
|
|
989
|
+
logger.error("image_analysis_error", error=str(e))
|
|
990
|
+
raise
|
|
991
|
+
|
|
992
|
+
def _encode_image(self, image_bytes: bytes) -> Dict[str, Any]:
|
|
993
|
+
image = Image.open(io.BytesIO(image_bytes))
|
|
994
|
+
fmt = image.format.lower() if image.format else "png"
|
|
995
|
+
base64_image = base64.b64encode(image_bytes).decode('utf-8')
|
|
996
|
+
return {"type": "image_url", "image_url": {"url": f"data:image/{fmt};base64,{base64_image}"}}
|
|
997
|
+
|
|
998
|
+
async def extract_text_from_image(self, **kwargs) -> str:
|
|
999
|
+
result = await self.analyze_image(prompt="Extract all text from this image. Return only the extracted text.", **kwargs)
|
|
1000
|
+
return result["description"]
|
|
1001
|
+
|
|
1002
|
+
async def describe_chart(self, **kwargs) -> Dict[str, Any]:
|
|
1003
|
+
return await self.analyze_image(
|
|
1004
|
+
prompt="Analyze this chart/graph. Provide: 1. Chart type 2. Key data points 3. Main trends 4. Notable patterns",
|
|
1005
|
+
max_tokens=800,
|
|
1006
|
+
**kwargs,
|
|
1007
|
+
)
|
|
1008
|
+
|
|
1009
|
+
|
|
1010
|
+
vision_tool = VisionTool()
|
|
1011
|
+
`;
|
|
1012
|
+
}
|
|
1013
|
+
// ============================================================================
|
|
1014
|
+
// Google Sheets Tools
|
|
1015
|
+
// ============================================================================
|
|
1016
|
+
function generateGoogleSheetsTools() {
|
|
1017
|
+
return `"""Google Sheets Integration Tools. Auto-generated by ChimerAI CLI."""
|
|
1018
|
+
|
|
1019
|
+
import structlog
|
|
1020
|
+
from typing import List, Dict, Any, Optional
|
|
1021
|
+
from google.oauth2 import service_account
|
|
1022
|
+
from googleapiclient.discovery import build
|
|
1023
|
+
from googleapiclient.errors import HttpError
|
|
1024
|
+
import os
|
|
1025
|
+
|
|
1026
|
+
logger = structlog.get_logger()
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
class GoogleSheetsTools:
|
|
1030
|
+
"""Tools for interacting with Google Sheets API."""
|
|
1031
|
+
|
|
1032
|
+
def __init__(self):
|
|
1033
|
+
self.service = None
|
|
1034
|
+
self._initialize_service()
|
|
1035
|
+
|
|
1036
|
+
def _initialize_service(self):
|
|
1037
|
+
try:
|
|
1038
|
+
creds_path = os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
|
|
1039
|
+
if creds_path and os.path.exists(creds_path):
|
|
1040
|
+
creds = service_account.Credentials.from_service_account_file(
|
|
1041
|
+
creds_path, scopes=['https://www.googleapis.com/auth/spreadsheets']
|
|
1042
|
+
)
|
|
1043
|
+
self.service = build('sheets', 'v4', credentials=creds)
|
|
1044
|
+
logger.info("google_sheets_initialized")
|
|
1045
|
+
else:
|
|
1046
|
+
logger.warning("google_sheets_no_credentials")
|
|
1047
|
+
except Exception as e:
|
|
1048
|
+
logger.error("google_sheets_init_failed", error=str(e))
|
|
1049
|
+
self.service = None
|
|
1050
|
+
|
|
1051
|
+
async def read_sheet(self, spreadsheet_id: str, range_name: str, value_render_option: str = "FORMATTED_VALUE") -> Dict[str, Any]:
|
|
1052
|
+
try:
|
|
1053
|
+
if not self.service:
|
|
1054
|
+
return {"error": "Google Sheets not initialized", "values": []}
|
|
1055
|
+
result = self.service.spreadsheets().values().get(
|
|
1056
|
+
spreadsheetId=spreadsheet_id, range=range_name, valueRenderOption=value_render_option
|
|
1057
|
+
).execute()
|
|
1058
|
+
values = result.get('values', [])
|
|
1059
|
+
return {"values": values, "range": result.get('range'), "row_count": len(values), "column_count": len(values[0]) if values else 0}
|
|
1060
|
+
except HttpError as e:
|
|
1061
|
+
return {"error": f"HTTP Error: {e.status_code}", "values": []}
|
|
1062
|
+
except Exception as e:
|
|
1063
|
+
return {"error": str(e), "values": []}
|
|
1064
|
+
|
|
1065
|
+
async def write_sheet(self, spreadsheet_id: str, range_name: str, values: List[List[Any]], value_input_option: str = "USER_ENTERED") -> Dict[str, Any]:
|
|
1066
|
+
try:
|
|
1067
|
+
if not self.service:
|
|
1068
|
+
return {"error": "Google Sheets not initialized"}
|
|
1069
|
+
result = self.service.spreadsheets().values().update(
|
|
1070
|
+
spreadsheetId=spreadsheet_id, range=range_name, valueInputOption=value_input_option, body={'values': values}
|
|
1071
|
+
).execute()
|
|
1072
|
+
return {"updatedRange": result.get('updatedRange'), "updatedCells": result.get('updatedCells')}
|
|
1073
|
+
except Exception as e:
|
|
1074
|
+
return {"error": str(e)}
|
|
1075
|
+
|
|
1076
|
+
async def append_rows(self, spreadsheet_id: str, range_name: str, values: List[List[Any]], value_input_option: str = "USER_ENTERED") -> Dict[str, Any]:
|
|
1077
|
+
try:
|
|
1078
|
+
if not self.service:
|
|
1079
|
+
return {"error": "Google Sheets not initialized"}
|
|
1080
|
+
result = self.service.spreadsheets().values().append(
|
|
1081
|
+
spreadsheetId=spreadsheet_id, range=range_name, valueInputOption=value_input_option, insertDataOption='INSERT_ROWS', body={'values': values}
|
|
1082
|
+
).execute()
|
|
1083
|
+
return {"updatedRange": result.get('updates', {}).get('updatedRange'), "updatedCells": result.get('updates', {}).get('updatedCells')}
|
|
1084
|
+
except Exception as e:
|
|
1085
|
+
return {"error": str(e)}
|
|
1086
|
+
|
|
1087
|
+
async def update_cell(self, spreadsheet_id: str, cell: str, value: Any, value_input_option: str = "USER_ENTERED") -> Dict[str, Any]:
|
|
1088
|
+
try:
|
|
1089
|
+
if not self.service:
|
|
1090
|
+
return {"error": "Google Sheets not initialized"}
|
|
1091
|
+
result = self.service.spreadsheets().values().update(
|
|
1092
|
+
spreadsheetId=spreadsheet_id, range=cell, valueInputOption=value_input_option, body={'values': [[value]]}
|
|
1093
|
+
).execute()
|
|
1094
|
+
return {"updatedRange": result.get('updatedRange'), "updatedCells": result.get('updatedCells')}
|
|
1095
|
+
except Exception as e:
|
|
1096
|
+
return {"error": str(e)}
|
|
1097
|
+
|
|
1098
|
+
async def create_sheet(self, spreadsheet_id: str, sheet_title: str) -> Dict[str, Any]:
|
|
1099
|
+
try:
|
|
1100
|
+
if not self.service:
|
|
1101
|
+
return {"error": "Google Sheets not initialized"}
|
|
1102
|
+
body = {'requests': [{'addSheet': {'properties': {'title': sheet_title}}}]}
|
|
1103
|
+
result = self.service.spreadsheets().batchUpdate(spreadsheetId=spreadsheet_id, body=body).execute()
|
|
1104
|
+
sheet_id = result.get('replies', [{}])[0].get('addSheet', {}).get('properties', {}).get('sheetId')
|
|
1105
|
+
return {"sheetId": sheet_id, "title": sheet_title}
|
|
1106
|
+
except Exception as e:
|
|
1107
|
+
return {"error": str(e)}
|
|
1108
|
+
|
|
1109
|
+
async def clear_sheet(self, spreadsheet_id: str, range_name: str) -> Dict[str, Any]:
|
|
1110
|
+
try:
|
|
1111
|
+
if not self.service:
|
|
1112
|
+
return {"error": "Google Sheets not initialized"}
|
|
1113
|
+
result = self.service.spreadsheets().values().clear(spreadsheetId=spreadsheet_id, range=range_name, body={}).execute()
|
|
1114
|
+
return {"clearedRange": result.get('clearedRange')}
|
|
1115
|
+
except Exception as e:
|
|
1116
|
+
return {"error": str(e)}
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
google_sheets_tools = GoogleSheetsTools()
|
|
1120
|
+
`;
|
|
1121
|
+
}
|
|
1122
|
+
// ============================================================================
|
|
1123
|
+
// Airtable Tools
|
|
1124
|
+
// ============================================================================
|
|
1125
|
+
function generateAirtableTools() {
|
|
1126
|
+
return `"""Airtable Integration Tools. Auto-generated by ChimerAI CLI."""
|
|
1127
|
+
|
|
1128
|
+
import structlog
|
|
1129
|
+
from typing import List, Dict, Any, Optional
|
|
1130
|
+
from pyairtable import Api
|
|
1131
|
+
from pyairtable.formulas import match
|
|
1132
|
+
import os
|
|
1133
|
+
|
|
1134
|
+
logger = structlog.get_logger()
|
|
1135
|
+
|
|
1136
|
+
|
|
1137
|
+
class AirtableTools:
|
|
1138
|
+
"""Tools for interacting with Airtable API."""
|
|
1139
|
+
|
|
1140
|
+
def __init__(self):
|
|
1141
|
+
self.api = None
|
|
1142
|
+
api_key = os.getenv("AIRTABLE_API_KEY") or os.getenv("AIRTABLE_ACCESS_TOKEN")
|
|
1143
|
+
if api_key:
|
|
1144
|
+
self.api = Api(api_key)
|
|
1145
|
+
logger.info("airtable_initialized")
|
|
1146
|
+
else:
|
|
1147
|
+
logger.warning("airtable_no_credentials")
|
|
1148
|
+
|
|
1149
|
+
async def get_records(self, base_id, table_name, max_records=None, view=None, formula=None, sort=None):
|
|
1150
|
+
try:
|
|
1151
|
+
if not self.api:
|
|
1152
|
+
return {"error": "Airtable not initialized", "records": []}
|
|
1153
|
+
table = self.api.table(base_id, table_name)
|
|
1154
|
+
kwargs = {}
|
|
1155
|
+
if max_records: kwargs['max_records'] = max_records
|
|
1156
|
+
if view: kwargs['view'] = view
|
|
1157
|
+
if formula: kwargs['formula'] = formula
|
|
1158
|
+
if sort: kwargs['sort'] = sort
|
|
1159
|
+
records = table.all(**kwargs)
|
|
1160
|
+
return {"records": [{"id": r["id"], "fields": r["fields"], "created_time": r.get("createdTime")} for r in records], "count": len(records)}
|
|
1161
|
+
except Exception as e:
|
|
1162
|
+
return {"error": str(e), "records": []}
|
|
1163
|
+
|
|
1164
|
+
async def get_record(self, base_id, table_name, record_id):
|
|
1165
|
+
try:
|
|
1166
|
+
if not self.api: return {"error": "Airtable not initialized"}
|
|
1167
|
+
record = self.api.table(base_id, table_name).get(record_id)
|
|
1168
|
+
return {"id": record["id"], "fields": record["fields"], "created_time": record.get("createdTime")}
|
|
1169
|
+
except Exception as e:
|
|
1170
|
+
return {"error": str(e)}
|
|
1171
|
+
|
|
1172
|
+
async def create_record(self, base_id, table_name, fields):
|
|
1173
|
+
try:
|
|
1174
|
+
if not self.api: return {"error": "Airtable not initialized"}
|
|
1175
|
+
record = self.api.table(base_id, table_name).create(fields)
|
|
1176
|
+
return {"id": record["id"], "fields": record["fields"]}
|
|
1177
|
+
except Exception as e:
|
|
1178
|
+
return {"error": str(e)}
|
|
1179
|
+
|
|
1180
|
+
async def create_records(self, base_id, table_name, records):
|
|
1181
|
+
try:
|
|
1182
|
+
if not self.api: return {"error": "Airtable not initialized"}
|
|
1183
|
+
created = self.api.table(base_id, table_name).batch_create(records)
|
|
1184
|
+
return {"records": [{"id": r["id"], "fields": r["fields"]} for r in created], "count": len(created)}
|
|
1185
|
+
except Exception as e:
|
|
1186
|
+
return {"error": str(e)}
|
|
1187
|
+
|
|
1188
|
+
async def update_record(self, base_id, table_name, record_id, fields, replace=False):
|
|
1189
|
+
try:
|
|
1190
|
+
if not self.api: return {"error": "Airtable not initialized"}
|
|
1191
|
+
table = self.api.table(base_id, table_name)
|
|
1192
|
+
record = table.update(record_id, fields, replace=True) if replace else table.update(record_id, fields)
|
|
1193
|
+
return {"id": record["id"], "fields": record["fields"]}
|
|
1194
|
+
except Exception as e:
|
|
1195
|
+
return {"error": str(e)}
|
|
1196
|
+
|
|
1197
|
+
async def delete_record(self, base_id, table_name, record_id):
|
|
1198
|
+
try:
|
|
1199
|
+
if not self.api: return {"error": "Airtable not initialized"}
|
|
1200
|
+
result = self.api.table(base_id, table_name).delete(record_id)
|
|
1201
|
+
return {"id": result["id"], "deleted": result.get("deleted", True)}
|
|
1202
|
+
except Exception as e:
|
|
1203
|
+
return {"error": str(e)}
|
|
1204
|
+
|
|
1205
|
+
async def search_records(self, base_id, table_name, field_name, field_value, max_records=None):
|
|
1206
|
+
try:
|
|
1207
|
+
if not self.api: return {"error": "Airtable not initialized", "records": []}
|
|
1208
|
+
table = self.api.table(base_id, table_name)
|
|
1209
|
+
formula = match({field_name: field_value})
|
|
1210
|
+
kwargs = {"formula": formula}
|
|
1211
|
+
if max_records: kwargs["max_records"] = max_records
|
|
1212
|
+
records = table.all(**kwargs)
|
|
1213
|
+
return {"records": [{"id": r["id"], "fields": r["fields"]} for r in records], "count": len(records)}
|
|
1214
|
+
except Exception as e:
|
|
1215
|
+
return {"error": str(e), "records": []}
|
|
1216
|
+
|
|
1217
|
+
|
|
1218
|
+
airtable_tools = AirtableTools()
|
|
1219
|
+
`;
|
|
1220
|
+
}
|
|
1221
|
+
// ============================================================================
|
|
1222
|
+
// DeepL Translation Tools
|
|
1223
|
+
// ============================================================================
|
|
1224
|
+
function generateDeeplTools() {
|
|
1225
|
+
return `"""DeepL Translation Tools. Auto-generated by ChimerAI CLI."""
|
|
1226
|
+
|
|
1227
|
+
import structlog
|
|
1228
|
+
from typing import List, Dict, Any, Optional
|
|
1229
|
+
import deepl
|
|
1230
|
+
import os
|
|
1231
|
+
|
|
1232
|
+
logger = structlog.get_logger()
|
|
1233
|
+
|
|
1234
|
+
|
|
1235
|
+
class DeepLTools:
|
|
1236
|
+
"""Tools for text translation using DeepL API."""
|
|
1237
|
+
|
|
1238
|
+
def __init__(self):
|
|
1239
|
+
self.translator = None
|
|
1240
|
+
auth_key = os.getenv("DEEPL_AUTH_KEY") or os.getenv("DEEPL_API_KEY")
|
|
1241
|
+
if auth_key:
|
|
1242
|
+
self.translator = deepl.Translator(auth_key)
|
|
1243
|
+
logger.info("deepl_initialized")
|
|
1244
|
+
else:
|
|
1245
|
+
logger.warning("deepl_no_credentials")
|
|
1246
|
+
|
|
1247
|
+
async def translate_text(self, text, target_lang, source_lang=None, formality=None, split_sentences=None, preserve_formatting=True, tag_handling=None):
|
|
1248
|
+
try:
|
|
1249
|
+
if not self.translator:
|
|
1250
|
+
return {"error": "DeepL not initialized", "translations": []}
|
|
1251
|
+
is_batch = isinstance(text, list)
|
|
1252
|
+
kwargs = {"target_lang": target_lang.upper()}
|
|
1253
|
+
if source_lang: kwargs["source_lang"] = source_lang.upper()
|
|
1254
|
+
if formality: kwargs["formality"] = formality
|
|
1255
|
+
if split_sentences: kwargs["split_sentences"] = split_sentences
|
|
1256
|
+
if not preserve_formatting: kwargs["preserve_formatting"] = False
|
|
1257
|
+
if tag_handling: kwargs["tag_handling"] = tag_handling
|
|
1258
|
+
|
|
1259
|
+
if is_batch:
|
|
1260
|
+
results = self.translator.translate_text(text, **kwargs)
|
|
1261
|
+
translations = [{"text": r.text, "detected_source_lang": r.detected_source_lang} for r in results]
|
|
1262
|
+
else:
|
|
1263
|
+
result = self.translator.translate_text(text, **kwargs)
|
|
1264
|
+
translations = [{"text": result.text, "detected_source_lang": result.detected_source_lang}]
|
|
1265
|
+
return {"translations": translations, "target_lang": target_lang.upper(), "source_lang": source_lang.upper() if source_lang else translations[0]["detected_source_lang"]}
|
|
1266
|
+
except Exception as e:
|
|
1267
|
+
return {"error": str(e), "translations": []}
|
|
1268
|
+
|
|
1269
|
+
async def detect_language(self, text):
|
|
1270
|
+
try:
|
|
1271
|
+
if not self.translator: return {"error": "DeepL not initialized"}
|
|
1272
|
+
result = self.translator.translate_text(text, target_lang="EN-US")
|
|
1273
|
+
return {"language": result.detected_source_lang, "text_sample": text[:100]}
|
|
1274
|
+
except Exception as e:
|
|
1275
|
+
return {"error": str(e)}
|
|
1276
|
+
|
|
1277
|
+
async def get_usage(self):
|
|
1278
|
+
try:
|
|
1279
|
+
if not self.translator: return {"error": "DeepL not initialized"}
|
|
1280
|
+
usage = self.translator.get_usage()
|
|
1281
|
+
percentage = 100.0 if usage.character.limit_reached else (usage.character.count / usage.character.limit * 100 if usage.character.limit else None)
|
|
1282
|
+
return {"character_count": usage.character.count, "character_limit": usage.character.limit, "limit_reached": usage.character.limit_reached, "percentage_used": round(percentage, 2) if percentage else None}
|
|
1283
|
+
except Exception as e:
|
|
1284
|
+
return {"error": str(e)}
|
|
1285
|
+
|
|
1286
|
+
async def get_supported_languages(self, language_type="target"):
|
|
1287
|
+
try:
|
|
1288
|
+
if not self.translator: return {"error": "DeepL not initialized", "languages": []}
|
|
1289
|
+
languages = self.translator.get_source_languages() if language_type == "source" else self.translator.get_target_languages()
|
|
1290
|
+
return {"languages": [{"code": l.code, "name": l.name, "supports_formality": getattr(l, 'supports_formality', None)} for l in languages], "type": language_type, "count": len(languages)}
|
|
1291
|
+
except Exception as e:
|
|
1292
|
+
return {"error": str(e), "languages": []}
|
|
1293
|
+
|
|
1294
|
+
async def translate_document(self, input_path, output_path, target_lang, source_lang=None, formality=None):
|
|
1295
|
+
try:
|
|
1296
|
+
if not self.translator: return {"error": "DeepL not initialized"}
|
|
1297
|
+
kwargs = {"target_lang": target_lang.upper()}
|
|
1298
|
+
if source_lang: kwargs["source_lang"] = source_lang.upper()
|
|
1299
|
+
if formality: kwargs["formality"] = formality
|
|
1300
|
+
self.translator.translate_document_from_filepath(input_path, output_path, **kwargs)
|
|
1301
|
+
return {"success": True, "input_path": input_path, "output_path": output_path, "target_lang": target_lang.upper()}
|
|
1302
|
+
except FileNotFoundError:
|
|
1303
|
+
return {"error": f"File not found: {input_path}"}
|
|
1304
|
+
except Exception as e:
|
|
1305
|
+
return {"error": str(e)}
|
|
1306
|
+
|
|
1307
|
+
|
|
1308
|
+
deepl_tools = DeepLTools()
|
|
1309
|
+
`;
|
|
1310
|
+
}
|
|
1311
|
+
// ============================================================================
|
|
1312
|
+
// Webhook Tools — n8n, Zapier, Make, Slack
|
|
1313
|
+
// ============================================================================
|
|
1314
|
+
function generateWebhookTools() {
|
|
1315
|
+
return `"""Webhook Tools — Universal Integration. Auto-generated by ChimerAI CLI."""
|
|
1316
|
+
|
|
1317
|
+
import structlog
|
|
1318
|
+
from typing import Dict, Any, Optional
|
|
1319
|
+
import httpx
|
|
1320
|
+
import os
|
|
1321
|
+
|
|
1322
|
+
logger = structlog.get_logger()
|
|
1323
|
+
|
|
1324
|
+
|
|
1325
|
+
class WebhookTools:
|
|
1326
|
+
"""Universal webhook caller — n8n, Zapier, Make, Slack, custom endpoints."""
|
|
1327
|
+
|
|
1328
|
+
def __init__(self):
|
|
1329
|
+
self.client = httpx.AsyncClient(timeout=30.0)
|
|
1330
|
+
logger.info("webhook_tools_initialized")
|
|
1331
|
+
|
|
1332
|
+
async def call_webhook(self, url, payload=None, method="POST", headers=None, query_params=None, auth_token=None):
|
|
1333
|
+
try:
|
|
1334
|
+
request_headers = {"Content-Type": "application/json", "User-Agent": "ChimerAI-Agent/1.0"}
|
|
1335
|
+
if headers: request_headers.update(headers)
|
|
1336
|
+
if auth_token: request_headers["Authorization"] = f"Bearer {auth_token}"
|
|
1337
|
+
|
|
1338
|
+
request_kwargs = {"url": url, "headers": request_headers}
|
|
1339
|
+
if payload: request_kwargs["json"] = payload
|
|
1340
|
+
if query_params: request_kwargs["params"] = query_params
|
|
1341
|
+
|
|
1342
|
+
method = method.upper()
|
|
1343
|
+
if method == "POST": response = await self.client.post(**request_kwargs)
|
|
1344
|
+
elif method == "GET": response = await self.client.get(**request_kwargs)
|
|
1345
|
+
elif method == "PUT": response = await self.client.put(**request_kwargs)
|
|
1346
|
+
elif method == "DELETE": response = await self.client.delete(**request_kwargs)
|
|
1347
|
+
elif method == "PATCH": response = await self.client.patch(**request_kwargs)
|
|
1348
|
+
else: return {"error": f"Unsupported method: {method}"}
|
|
1349
|
+
|
|
1350
|
+
try: response_data = response.json()
|
|
1351
|
+
except: response_data = {"text": response.text}
|
|
1352
|
+
|
|
1353
|
+
return {"success": response.is_success, "status_code": response.status_code, "response": response_data, "url": url, "method": method}
|
|
1354
|
+
except httpx.TimeoutException:
|
|
1355
|
+
return {"error": "Timeout after 30s", "url": url}
|
|
1356
|
+
except Exception as e:
|
|
1357
|
+
return {"error": str(e), "url": url}
|
|
1358
|
+
|
|
1359
|
+
async def call_n8n_webhook(self, webhook_id, payload, n8n_url=None):
|
|
1360
|
+
if not n8n_url: n8n_url = os.getenv("N8N_WEBHOOK_URL", "https://n8n.example.com")
|
|
1361
|
+
url = f"{n8n_url.rstrip('/')}/webhook/{webhook_id}"
|
|
1362
|
+
return await self.call_webhook(url=url, payload=payload)
|
|
1363
|
+
|
|
1364
|
+
async def call_zapier_webhook(self, hook_id, payload):
|
|
1365
|
+
url = f"https://hooks.zapier.com/hooks/catch/{hook_id}"
|
|
1366
|
+
return await self.call_webhook(url=url, payload=payload)
|
|
1367
|
+
|
|
1368
|
+
async def call_make_webhook(self, webhook_path, payload, region="eu1"):
|
|
1369
|
+
url = f"https://hook.{region}.make.com/{webhook_path}"
|
|
1370
|
+
return await self.call_webhook(url=url, payload=payload)
|
|
1371
|
+
|
|
1372
|
+
async def notify_slack_via_webhook(self, webhook_url, message, channel=None, username=None, icon_emoji=None):
|
|
1373
|
+
payload = {"text": message}
|
|
1374
|
+
if channel: payload["channel"] = channel
|
|
1375
|
+
if username: payload["username"] = username
|
|
1376
|
+
if icon_emoji: payload["icon_emoji"] = icon_emoji
|
|
1377
|
+
return await self.call_webhook(url=webhook_url, payload=payload)
|
|
1378
|
+
|
|
1379
|
+
|
|
1380
|
+
webhook_tools = WebhookTools()
|
|
1381
|
+
`;
|
|
1382
|
+
}
|
|
1383
|
+
// ============================================================================
|
|
1384
|
+
// Tools __init__.py — dynamic based on installed tools
|
|
1385
|
+
// ============================================================================
|
|
1386
|
+
function generateToolsInit(tools) {
|
|
1387
|
+
const imports = [];
|
|
1388
|
+
if (tools.includes('web_tools'))
|
|
1389
|
+
imports.push('from .web_tools import WebScraperTool, WebSearchTool');
|
|
1390
|
+
if (tools.includes('document_tools'))
|
|
1391
|
+
imports.push('from .document_tools import PDFTool, DocumentTool');
|
|
1392
|
+
if (tools.includes('code_tools'))
|
|
1393
|
+
imports.push('from .code_tools import CodeInterpreterTool');
|
|
1394
|
+
if (tools.includes('nlp_tools'))
|
|
1395
|
+
imports.push('from .nlp_tools import SummarizationTool, SentimentAnalysisTool');
|
|
1396
|
+
if (tools.includes('vision_tools'))
|
|
1397
|
+
imports.push('from .vision_tools import VisionTool');
|
|
1398
|
+
if (tools.includes('google_sheets_tools'))
|
|
1399
|
+
imports.push('from .google_sheets_tools import GoogleSheetsTools');
|
|
1400
|
+
if (tools.includes('airtable_tools'))
|
|
1401
|
+
imports.push('from .airtable_tools import AirtableTools');
|
|
1402
|
+
if (tools.includes('deepl_tools'))
|
|
1403
|
+
imports.push('from .deepl_tools import DeepLTools');
|
|
1404
|
+
if (tools.includes('webhook_tools'))
|
|
1405
|
+
imports.push('from .webhook_tools import WebhookTools');
|
|
1406
|
+
return `"""AI Tools — Auto-generated by ChimerAI CLI."""
|
|
1407
|
+
|
|
1408
|
+
${imports.join('\n')}
|
|
1409
|
+
`;
|
|
1410
|
+
}
|
|
1411
|
+
/**
|
|
1412
|
+
* Map from tool key to generator function
|
|
1413
|
+
*/
|
|
1414
|
+
exports.TOOL_GENERATORS = {
|
|
1415
|
+
web_tools: generateWebTools,
|
|
1416
|
+
document_tools: generateDocumentTools,
|
|
1417
|
+
code_tools: generateCodeTools,
|
|
1418
|
+
nlp_tools: generateNlpTools,
|
|
1419
|
+
vision_tools: generateVisionTools,
|
|
1420
|
+
google_sheets_tools: generateGoogleSheetsTools,
|
|
1421
|
+
airtable_tools: generateAirtableTools,
|
|
1422
|
+
deepl_tools: generateDeeplTools,
|
|
1423
|
+
webhook_tools: generateWebhookTools,
|
|
1424
|
+
};
|