@push.rocks/smartai 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,13 @@ export type TChatCompletionRequestMessage = {
9
9
  };
10
10
 
11
11
  import { MultiModalModel } from './abstract.classes.multimodal.js';
12
- import type { ResearchOptions, ResearchResponse } from './abstract.classes.multimodal.js';
12
+ import type {
13
+ ResearchOptions,
14
+ ResearchResponse,
15
+ ImageGenerateOptions,
16
+ ImageEditOptions,
17
+ ImageResponse
18
+ } from './abstract.classes.multimodal.js';
13
19
 
14
20
  export interface IOpenaiProviderOptions {
15
21
  openaiToken: string;
@@ -17,6 +23,7 @@ export interface IOpenaiProviderOptions {
17
23
  audioModel?: string;
18
24
  visionModel?: string;
19
25
  researchModel?: string;
26
+ imageModel?: string;
20
27
  enableWebSearch?: boolean;
21
28
  }
22
29
 
@@ -233,52 +240,37 @@ export class OpenAiProvider extends MultiModalModel {
233
240
  }
234
241
 
235
242
  public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
236
- // Determine which model to use based on search depth
243
+ // Determine which model to use - Deep Research API requires specific models
237
244
  let model: string;
238
245
  if (optionsArg.searchDepth === 'deep') {
239
246
  model = this.options.researchModel || 'o4-mini-deep-research-2025-06-26';
240
247
  } else {
241
- model = this.options.chatModel || 'gpt-5-mini';
248
+ // For basic/advanced, still use deep research models if web search is needed
249
+ if (optionsArg.includeWebSearch) {
250
+ model = this.options.researchModel || 'o4-mini-deep-research-2025-06-26';
251
+ } else {
252
+ model = this.options.chatModel || 'gpt-5-mini';
253
+ }
242
254
  }
243
255
 
244
- // Prepare the request parameters
256
+ const systemMessage = 'You are a research assistant. Provide comprehensive answers with citations and sources when available.';
257
+
258
+ // Prepare request parameters using Deep Research API format
245
259
  const requestParams: any = {
246
260
  model,
247
- messages: [
248
- {
249
- role: 'system',
250
- content: 'You are a research assistant. Provide comprehensive answers with citations and sources when available.'
251
- },
252
- {
253
- role: 'user',
254
- content: optionsArg.query
255
- }
256
- ],
257
- temperature: 0.7
261
+ instructions: systemMessage,
262
+ input: optionsArg.query
258
263
  };
259
264
 
260
- // Add web search tools if requested
265
+ // Add web search tool if requested
261
266
  if (optionsArg.includeWebSearch || optionsArg.searchDepth === 'deep') {
262
267
  requestParams.tools = [
263
268
  {
264
- type: 'function',
265
- function: {
266
- name: 'web_search',
267
- description: 'Search the web for information',
268
- parameters: {
269
- type: 'object',
270
- properties: {
271
- query: {
272
- type: 'string',
273
- description: 'The search query'
274
- }
275
- },
276
- required: ['query']
277
- }
278
- }
269
+ type: 'web_search_preview',
270
+ search_context_size: optionsArg.searchDepth === 'deep' ? 'high' :
271
+ optionsArg.searchDepth === 'advanced' ? 'medium' : 'low'
279
272
  }
280
273
  ];
281
- requestParams.tool_choice = 'auto';
282
274
  }
283
275
 
284
276
  // Add background flag for deep research
@@ -287,14 +279,36 @@ export class OpenAiProvider extends MultiModalModel {
287
279
  }
288
280
 
289
281
  try {
290
- // Execute the research request
291
- const result = await this.openAiApiClient.chat.completions.create(requestParams);
292
-
293
- // Extract the answer
294
- const answer = result.choices[0].message.content || '';
282
+ // Execute the research request using Deep Research API
283
+ const result = await this.openAiApiClient.responses.create(requestParams);
295
284
 
296
- // Parse sources from the response (OpenAI often includes URLs in markdown format)
285
+ // Extract the answer from output items
286
+ let answer = '';
297
287
  const sources: Array<{ url: string; title: string; snippet: string }> = [];
288
+ const searchQueries: string[] = [];
289
+
290
+ // Process output items
291
+ for (const item of result.output || []) {
292
+ // Extract message content
293
+ if (item.type === 'message' && 'content' in item) {
294
+ const messageItem = item as any;
295
+ for (const contentItem of messageItem.content || []) {
296
+ if (contentItem.type === 'output_text' && 'text' in contentItem) {
297
+ answer += contentItem.text;
298
+ }
299
+ }
300
+ }
301
+
302
+ // Extract web search queries
303
+ if (item.type === 'web_search_call' && 'action' in item) {
304
+ const searchItem = item as any;
305
+ if (searchItem.action && searchItem.action.type === 'search' && 'query' in searchItem.action) {
306
+ searchQueries.push(searchItem.action.query);
307
+ }
308
+ }
309
+ }
310
+
311
+ // Parse sources from markdown links in the answer
298
312
  const urlRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
299
313
  let match: RegExpExecArray | null;
300
314
 
@@ -302,27 +316,10 @@ export class OpenAiProvider extends MultiModalModel {
302
316
  sources.push({
303
317
  title: match[1],
304
318
  url: match[2],
305
- snippet: '' // OpenAI doesn't provide snippets in standard responses
319
+ snippet: ''
306
320
  });
307
321
  }
308
322
 
309
- // Extract search queries if tools were used
310
- const searchQueries: string[] = [];
311
- if (result.choices[0].message.tool_calls) {
312
- for (const toolCall of result.choices[0].message.tool_calls) {
313
- if ('function' in toolCall && toolCall.function.name === 'web_search') {
314
- try {
315
- const args = JSON.parse(toolCall.function.arguments);
316
- if (args.query) {
317
- searchQueries.push(args.query);
318
- }
319
- } catch (e) {
320
- // Ignore parsing errors
321
- }
322
- }
323
- }
324
- }
325
-
326
323
  return {
327
324
  answer,
328
325
  sources,
@@ -338,4 +335,121 @@ export class OpenAiProvider extends MultiModalModel {
338
335
  throw new Error(`Failed to perform research: ${error.message}`);
339
336
  }
340
337
  }
338
+
339
+ /**
340
+ * Image generation using OpenAI's gpt-image-1 or DALL-E models
341
+ */
342
+ public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
343
+ const model = optionsArg.model || this.options.imageModel || 'gpt-image-1';
344
+
345
+ try {
346
+ const requestParams: any = {
347
+ model,
348
+ prompt: optionsArg.prompt,
349
+ n: optionsArg.n || 1,
350
+ };
351
+
352
+ // Add gpt-image-1 specific parameters
353
+ if (model === 'gpt-image-1') {
354
+ if (optionsArg.quality) requestParams.quality = optionsArg.quality;
355
+ if (optionsArg.size) requestParams.size = optionsArg.size;
356
+ if (optionsArg.background) requestParams.background = optionsArg.background;
357
+ if (optionsArg.outputFormat) requestParams.output_format = optionsArg.outputFormat;
358
+ if (optionsArg.outputCompression !== undefined) requestParams.output_compression = optionsArg.outputCompression;
359
+ if (optionsArg.moderation) requestParams.moderation = optionsArg.moderation;
360
+ if (optionsArg.stream !== undefined) requestParams.stream = optionsArg.stream;
361
+ if (optionsArg.partialImages !== undefined) requestParams.partial_images = optionsArg.partialImages;
362
+ } else if (model === 'dall-e-3') {
363
+ // DALL-E 3 specific parameters
364
+ if (optionsArg.quality) requestParams.quality = optionsArg.quality;
365
+ if (optionsArg.size) requestParams.size = optionsArg.size;
366
+ if (optionsArg.style) requestParams.style = optionsArg.style;
367
+ requestParams.response_format = 'b64_json'; // Always use base64 for consistency
368
+ } else if (model === 'dall-e-2') {
369
+ // DALL-E 2 specific parameters
370
+ if (optionsArg.size) requestParams.size = optionsArg.size;
371
+ requestParams.response_format = 'b64_json';
372
+ }
373
+
374
+ const result = await this.openAiApiClient.images.generate(requestParams);
375
+
376
+ const images = (result.data || []).map(img => ({
377
+ b64_json: img.b64_json,
378
+ url: img.url,
379
+ revisedPrompt: img.revised_prompt
380
+ }));
381
+
382
+ return {
383
+ images,
384
+ metadata: {
385
+ model,
386
+ quality: result.quality,
387
+ size: result.size,
388
+ outputFormat: result.output_format,
389
+ tokensUsed: result.usage?.total_tokens
390
+ }
391
+ };
392
+ } catch (error) {
393
+ console.error('Image generation error:', error);
394
+ throw new Error(`Failed to generate image: ${error.message}`);
395
+ }
396
+ }
397
+
398
+ /**
399
+ * Image editing using OpenAI's gpt-image-1 or DALL-E 2 models
400
+ */
401
+ public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
402
+ const model = optionsArg.model || this.options.imageModel || 'gpt-image-1';
403
+
404
+ try {
405
+ const requestParams: any = {
406
+ model,
407
+ image: optionsArg.image,
408
+ prompt: optionsArg.prompt,
409
+ n: optionsArg.n || 1,
410
+ };
411
+
412
+ // Add mask if provided
413
+ if (optionsArg.mask) {
414
+ requestParams.mask = optionsArg.mask;
415
+ }
416
+
417
+ // Add gpt-image-1 specific parameters
418
+ if (model === 'gpt-image-1') {
419
+ if (optionsArg.quality) requestParams.quality = optionsArg.quality;
420
+ if (optionsArg.size) requestParams.size = optionsArg.size;
421
+ if (optionsArg.background) requestParams.background = optionsArg.background;
422
+ if (optionsArg.outputFormat) requestParams.output_format = optionsArg.outputFormat;
423
+ if (optionsArg.outputCompression !== undefined) requestParams.output_compression = optionsArg.outputCompression;
424
+ if (optionsArg.stream !== undefined) requestParams.stream = optionsArg.stream;
425
+ if (optionsArg.partialImages !== undefined) requestParams.partial_images = optionsArg.partialImages;
426
+ } else if (model === 'dall-e-2') {
427
+ // DALL-E 2 specific parameters
428
+ if (optionsArg.size) requestParams.size = optionsArg.size;
429
+ requestParams.response_format = 'b64_json';
430
+ }
431
+
432
+ const result = await this.openAiApiClient.images.edit(requestParams);
433
+
434
+ const images = (result.data || []).map(img => ({
435
+ b64_json: img.b64_json,
436
+ url: img.url,
437
+ revisedPrompt: img.revised_prompt
438
+ }));
439
+
440
+ return {
441
+ images,
442
+ metadata: {
443
+ model,
444
+ quality: result.quality,
445
+ size: result.size,
446
+ outputFormat: result.output_format,
447
+ tokensUsed: result.usage?.total_tokens
448
+ }
449
+ };
450
+ } catch (error) {
451
+ console.error('Image edit error:', error);
452
+ throw new Error(`Failed to edit image: ${error.message}`);
453
+ }
454
+ }
341
455
  }
@@ -1,7 +1,16 @@
1
1
  import * as plugins from './plugins.js';
2
2
  import * as paths from './paths.js';
3
3
  import { MultiModalModel } from './abstract.classes.multimodal.js';
4
- import type { ChatOptions, ChatResponse, ChatMessage, ResearchOptions, ResearchResponse } from './abstract.classes.multimodal.js';
4
+ import type {
5
+ ChatOptions,
6
+ ChatResponse,
7
+ ChatMessage,
8
+ ResearchOptions,
9
+ ResearchResponse,
10
+ ImageGenerateOptions,
11
+ ImageEditOptions,
12
+ ImageResponse
13
+ } from './abstract.classes.multimodal.js';
5
14
 
6
15
  export interface IPerplexityProviderOptions {
7
16
  perplexityToken: string;
@@ -233,4 +242,18 @@ export class PerplexityProvider extends MultiModalModel {
233
242
  throw new Error(`Failed to perform research: ${error.message}`);
234
243
  }
235
244
  }
245
+
246
+ /**
247
+ * Image generation is not supported by Perplexity
248
+ */
249
+ public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
250
+ throw new Error('Image generation is not supported by Perplexity. Please use OpenAI provider for image generation.');
251
+ }
252
+
253
+ /**
254
+ * Image editing is not supported by Perplexity
255
+ */
256
+ public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
257
+ throw new Error('Image editing is not supported by Perplexity. Please use OpenAI provider for image editing.');
258
+ }
236
259
  }
@@ -1,7 +1,16 @@
1
1
  import * as plugins from './plugins.js';
2
2
  import * as paths from './paths.js';
3
3
  import { MultiModalModel } from './abstract.classes.multimodal.js';
4
- import type { ChatOptions, ChatResponse, ChatMessage, ResearchOptions, ResearchResponse } from './abstract.classes.multimodal.js';
4
+ import type {
5
+ ChatOptions,
6
+ ChatResponse,
7
+ ChatMessage,
8
+ ResearchOptions,
9
+ ResearchResponse,
10
+ ImageGenerateOptions,
11
+ ImageEditOptions,
12
+ ImageResponse
13
+ } from './abstract.classes.multimodal.js';
5
14
  import type { ChatCompletionMessageParam } from 'openai/resources/chat/completions';
6
15
 
7
16
  export interface IXAIProviderOptions {
@@ -185,4 +194,18 @@ export class XAIProvider extends MultiModalModel {
185
194
  public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
186
195
  throw new Error('Research capabilities are not yet supported by xAI provider.');
187
196
  }
197
+
198
+ /**
199
+ * Image generation is not supported by xAI
200
+ */
201
+ public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
202
+ throw new Error('Image generation is not supported by xAI. Please use OpenAI provider for image generation.');
203
+ }
204
+
205
+ /**
206
+ * Image editing is not supported by xAI
207
+ */
208
+ public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
209
+ throw new Error('Image editing is not supported by xAI. Please use OpenAI provider for image editing.');
210
+ }
188
211
  }
@@ -1,177 +0,0 @@
1
- # SmartAI Research API Implementation
2
-
3
- This document describes the new research capabilities added to the SmartAI library, enabling web search and deep research features for OpenAI and Anthropic providers.
4
-
5
- ## Features Added
6
-
7
- ### 1. Research Method Interface
8
-
9
- Added a new `research()` method to the `MultiModalModel` abstract class with the following interfaces:
10
-
11
- ```typescript
12
- interface ResearchOptions {
13
- query: string;
14
- searchDepth?: 'basic' | 'advanced' | 'deep';
15
- maxSources?: number;
16
- includeWebSearch?: boolean;
17
- background?: boolean;
18
- }
19
-
20
- interface ResearchResponse {
21
- answer: string;
22
- sources: Array<{
23
- url: string;
24
- title: string;
25
- snippet: string;
26
- }>;
27
- searchQueries?: string[];
28
- metadata?: any;
29
- }
30
- ```
31
-
32
- ### 2. OpenAI Provider Research Implementation
33
-
34
- The OpenAI provider now supports:
35
- - **Deep Research API** with models:
36
- - `o3-deep-research-2025-06-26` (comprehensive analysis)
37
- - `o4-mini-deep-research-2025-06-26` (lightweight, faster)
38
- - **Web Search** for standard models (gpt-5, o3, o3-pro, o4-mini)
39
- - **Background processing** for async deep research tasks
40
-
41
- ### 3. Anthropic Provider Research Implementation
42
-
43
- The Anthropic provider now supports:
44
- - **Web Search API** with Claude models
45
- - **Domain filtering** (allow/block lists)
46
- - **Progressive searches** for comprehensive research
47
- - **Citation extraction** from responses
48
-
49
- ### 4. Perplexity Provider Research Implementation
50
-
51
- The Perplexity provider implements research using:
52
- - **Sonar models** for standard searches
53
- - **Sonar Pro** for deep research
54
- - Built-in citation support
55
-
56
- ### 5. Other Providers
57
-
58
- Added research method stubs to:
59
- - Groq Provider
60
- - Ollama Provider
61
- - xAI Provider
62
- - Exo Provider
63
-
64
- These providers throw a "not yet supported" error when research is called, maintaining interface compatibility.
65
-
66
- ## Usage Examples
67
-
68
- ### Basic Research with OpenAI
69
-
70
- ```typescript
71
- import { OpenAiProvider } from '@push.rocks/smartai';
72
-
73
- const openai = new OpenAiProvider({
74
- openaiToken: 'your-api-key',
75
- researchModel: 'o4-mini-deep-research-2025-06-26'
76
- });
77
-
78
- await openai.start();
79
-
80
- const result = await openai.research({
81
- query: 'What are the latest developments in quantum computing?',
82
- searchDepth: 'basic',
83
- includeWebSearch: true
84
- });
85
-
86
- console.log(result.answer);
87
- console.log('Sources:', result.sources);
88
- ```
89
-
90
- ### Deep Research with OpenAI
91
-
92
- ```typescript
93
- const deepResult = await openai.research({
94
- query: 'Comprehensive analysis of climate change mitigation strategies',
95
- searchDepth: 'deep',
96
- background: true
97
- });
98
- ```
99
-
100
- ### Research with Anthropic
101
-
102
- ```typescript
103
- import { AnthropicProvider } from '@push.rocks/smartai';
104
-
105
- const anthropic = new AnthropicProvider({
106
- anthropicToken: 'your-api-key',
107
- enableWebSearch: true,
108
- searchDomainAllowList: ['nature.com', 'science.org']
109
- });
110
-
111
- await anthropic.start();
112
-
113
- const result = await anthropic.research({
114
- query: 'Latest breakthroughs in CRISPR gene editing',
115
- searchDepth: 'advanced'
116
- });
117
- ```
118
-
119
- ### Research with Perplexity
120
-
121
- ```typescript
122
- import { PerplexityProvider } from '@push.rocks/smartai';
123
-
124
- const perplexity = new PerplexityProvider({
125
- perplexityToken: 'your-api-key'
126
- });
127
-
128
- const result = await perplexity.research({
129
- query: 'Current state of autonomous vehicle technology',
130
- searchDepth: 'deep' // Uses Sonar Pro model
131
- });
132
- ```
133
-
134
- ## Configuration Options
135
-
136
- ### OpenAI Provider
137
- - `researchModel`: Specify deep research model (default: `o4-mini-deep-research-2025-06-26`)
138
- - `enableWebSearch`: Enable web search for standard models
139
-
140
- ### Anthropic Provider
141
- - `enableWebSearch`: Enable web search capabilities
142
- - `searchDomainAllowList`: Array of allowed domains
143
- - `searchDomainBlockList`: Array of blocked domains
144
-
145
- ## API Pricing
146
-
147
- - **OpenAI Deep Research**: $10 per 1,000 calls
148
- - **Anthropic Web Search**: $10 per 1,000 searches + standard token costs
149
- - **Perplexity Sonar**: $5 per 1,000 searches (Sonar Pro)
150
-
151
- ## Testing
152
-
153
- Run the test suite:
154
-
155
- ```bash
156
- pnpm test test/test.research.ts
157
- ```
158
-
159
- All providers have been tested to ensure:
160
- - Research methods are properly exposed
161
- - Interfaces are correctly typed
162
- - Unsupported providers throw appropriate errors
163
-
164
- ## Next Steps
165
-
166
- Future enhancements could include:
167
- 1. Implementing Google Gemini Grounding API support
168
- 2. Adding Brave Search API integration
169
- 3. Implementing retry logic for rate limits
170
- 4. Adding caching for repeated queries
171
- 5. Supporting batch research operations
172
-
173
- ## Notes
174
-
175
- - The implementation maintains backward compatibility
176
- - All existing methods continue to work unchanged
177
- - Research capabilities are optional and don't affect existing functionality