@everworker/oneringai 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -27,7 +27,8 @@
27
27
  - [13. Streaming](#13-streaming)
28
28
  - [14. OAuth for External APIs](#14-oauth-for-external-apis)
29
29
  - [15. Developer Tools](#15-developer-tools)
30
- - [16. External API Integration](#16-external-api-integration) — Scoped Registry, Vendor Templates, Tool Discovery
30
+ - [16. Document Reader](#16-document-reader-new) — PDF, DOCX, XLSX, PPTX, CSV, HTML, images
31
+ - [17. External API Integration](#17-external-api-integration) — Scoped Registry, Vendor Templates, Tool Discovery
31
32
  - [MCP Integration](#mcp-model-context-protocol-integration)
32
33
  - [Documentation](#documentation)
33
34
  - [Examples](#examples)
@@ -67,14 +68,18 @@
67
68
  - 🎛️ **Dynamic Tool Management** - Enable/disable tools at runtime, namespaces, priority-based selection
68
69
  - 🔌 **Tool Execution Plugins** - NEW: Pluggable pipeline for logging, analytics, UI updates, custom behavior
69
70
  - 💾 **Session Persistence** - Save and resume conversations with full state restoration
71
+ - 👤 **Multi-User Support** - Set `userId` once, flows automatically to all tool executions and session metadata
72
+ - 🔒 **Connector Allowlist** - Restrict agents to a named subset of connectors, composable with access policies
70
73
  - 🤖 **Universal Agent** - ⚠️ *Deprecated* - Use `Agent` with plugins instead
71
74
  - 🤖 **Task Agents** - ⚠️ *Deprecated* - Use `Agent` with `WorkingMemoryPluginNextGen`
72
75
  - 🔬 **Research Agent** - ⚠️ *Deprecated* - Use `Agent` with search tools
73
76
  - 🎯 **Context Management** - Algorithmic compaction with tool-result-to-memory offloading
74
- - 📌 **InContextMemory** - NEW: Live key-value storage directly in LLM context for instant access
77
+ - 📌 **InContextMemory** - Live key-value storage directly in LLM context with optional UI display (`showInUI`)
75
78
  - 📝 **Persistent Instructions** - NEW: Agent-level custom instructions that persist across sessions on disk
76
79
  - 🛠️ **Agentic Workflows** - Built-in tool calling and multi-turn conversations
77
80
  - 🔧 **Developer Tools** - NEW: Filesystem and shell tools for coding assistants (read, write, edit, grep, glob, bash)
81
+ - 🖥️ **Desktop Automation** - NEW: OS-level computer use — screenshot, mouse, keyboard, and window control for vision-driven agent loops
82
+ - 📄 **Document Reader** - NEW: Universal file-to-text converter — PDF, DOCX, XLSX, PPTX, CSV, HTML, images auto-converted to markdown
78
83
  - 🔌 **MCP Integration** - NEW: Model Context Protocol client for seamless tool discovery from local and remote servers
79
84
  - 👁️ **Vision Support** - Analyze images with AI across all providers
80
85
  - 📋 **Clipboard Integration** - Paste screenshots directly (like Claude Code!)
@@ -84,6 +89,8 @@
84
89
  - 🔄 **Streaming** - Real-time responses with event streams
85
90
  - 📝 **TypeScript** - Full type safety and IntelliSense support
86
91
 
92
+ > **v0.2.0 — Multi-User Support:** Set `userId` once on an agent and it automatically flows to all tool executions, OAuth token retrieval, session metadata, and connector scoping. Combine with `connectors` allowlist and access policies for complete multi-tenant isolation. See [Multi-User Support](#multi-user-support-userid) and [Connector Allowlist](#connector-allowlist-connectors) in the User Guide.
93
+
87
94
  ## Quick Start
88
95
 
89
96
  ### Installation
@@ -250,6 +257,55 @@ const veoJob = await googleVideo.generate({
250
257
  });
251
258
  ```
252
259
 
260
+ ### Document Reader (NEW)
261
+
262
+ Read any document format — agents automatically get markdown text from PDFs, Word docs, spreadsheets, and more:
263
+
264
+ ```typescript
265
+ import { Agent, developerTools } from '@everworker/oneringai';
266
+
267
+ const agent = Agent.create({
268
+ connector: 'openai',
269
+ model: 'gpt-4',
270
+ tools: developerTools,
271
+ });
272
+
273
+ // read_file auto-converts binary documents to markdown
274
+ await agent.run('Read /path/to/report.pdf and summarize the key findings');
275
+ await agent.run('Read /path/to/data.xlsx and describe the trends');
276
+ await agent.run('Read /path/to/presentation.pptx and list all slides');
277
+ ```
278
+
279
+ **Programmatic usage:**
280
+
281
+ ```typescript
282
+ import { DocumentReader, readDocumentAsContent } from '@everworker/oneringai';
283
+
284
+ // Read any file to markdown pieces
285
+ const reader = DocumentReader.create();
286
+ const result = await reader.read('/path/to/report.pdf');
287
+ console.log(result.pieces); // DocumentPiece[] (text + images)
288
+
289
+ // One-call conversion to LLM Content[] (for multimodal input)
290
+ const content = await readDocumentAsContent('/path/to/slides.pptx', {
291
+ imageFilter: { minWidth: 100, minHeight: 100 },
292
+ imageDetail: 'auto',
293
+ });
294
+
295
+ const response = await agent.run([
296
+ { type: 'input_text', text: 'Analyze this document:' },
297
+ ...content,
298
+ ]);
299
+ ```
300
+
301
+ **Supported Formats:**
302
+ - **Office**: DOCX, PPTX, ODT, ODP, ODS, RTF (via `officeparser`)
303
+ - **Spreadsheets**: XLSX, CSV (via `exceljs`)
304
+ - **PDF** (via `unpdf`)
305
+ - **HTML** (via Readability + Turndown)
306
+ - **Text**: TXT, MD, JSON, XML, YAML
307
+ - **Images**: PNG, JPG, GIF, WEBP, SVG (pass-through as base64)
308
+
253
309
  ### Web Search
254
310
 
255
311
  Connector-based web search with multiple providers:
@@ -359,6 +415,8 @@ const storage = createFileContextStorage('my-assistant');
359
415
  const agent = Agent.create({
360
416
  connector: 'openai',
361
417
  model: 'gpt-4',
418
+ userId: 'user-123', // Flows to all tool executions automatically
419
+ connectors: ['github', 'slack'], // Only these connectors visible to tools
362
420
  tools: [weatherTool, emailTool],
363
421
  context: {
364
422
  features: {
@@ -711,6 +769,9 @@ const plugin = ctx.getPlugin('in_context_memory');
711
769
  plugin.set('current_state', 'Task processing state', { step: 2, status: 'active' });
712
770
  plugin.set('user_prefs', 'User preferences', { verbose: true }, 'high');
713
771
 
772
+ // Store data with UI display - shown in the host app's sidebar panel
773
+ plugin.set('dashboard', 'Progress dashboard', '## Progress\n- [x] Step 1\n- [ ] Step 2', 'normal', true);
774
+
714
775
  // LLM can use context_set/context_delete/context_list tools
715
776
  // Or access directly via plugin API
716
777
  const state = plugin.get('current_state'); // { step: 2, status: 'active' }
@@ -720,7 +781,9 @@ const state = plugin.get('current_state'); // { step: 2, status: 'active' }
720
781
  - **WorkingMemory**: External storage + index → requires `memory_retrieve()` for values
721
782
  - **InContextMemory**: Full values in context → instant access, no retrieval needed
722
783
 
723
- **Use cases:** Session state, user preferences, counters, flags, small accumulated results.
784
+ **UI Display (`showInUI`):** Entries with `showInUI: true` are displayed in the host application's sidebar panel with full markdown rendering (code blocks, tables, charts, diagrams, etc.). The LLM sets this via the `context_set` tool. Users can also pin specific entries to always display them regardless of the agent's setting. See the [User Guide](./USER_GUIDE.md#ui-display-showInUI) for details.
785
+
786
+ **Use cases:** Session state, user preferences, counters, flags, small accumulated results, live dashboards.
724
787
 
725
788
  ### 9. Persistent Instructions
726
789
 
@@ -952,7 +1015,108 @@ await agent.run('Run npm test and report any failures');
952
1015
  - Timeout protection (default 2 min)
953
1016
  - Output truncation for large outputs
954
1017
 
955
- ### 16. External API Integration
1018
+ ### 16. Desktop Automation Tools (NEW)
1019
+
1020
+ OS-level desktop automation for building "computer use" agents — screenshot the screen, send to a vision model, receive tool calls (click, type, etc.), execute them, repeat:
1021
+
1022
+ ```typescript
1023
+ import { desktopTools } from '@everworker/oneringai';
1024
+
1025
+ const agent = Agent.create({
1026
+ connector: 'openai',
1027
+ model: 'gpt-4',
1028
+ tools: desktopTools, // All 11 desktop tools
1029
+ });
1030
+
1031
+ // Agent can now see and interact with the desktop:
1032
+ await agent.run('Take a screenshot and describe what you see');
1033
+ await agent.run('Open Safari and search for "weather forecast"');
1034
+ ```
1035
+
1036
+ **Available Tools:**
1037
+ - **desktop_screenshot** - Capture full screen or region (returns image to vision model)
1038
+ - **desktop_mouse_move** - Move cursor to position
1039
+ - **desktop_mouse_click** - Click (left/right/middle, single/double/triple)
1040
+ - **desktop_mouse_drag** - Drag from one position to another
1041
+ - **desktop_mouse_scroll** - Scroll wheel (vertical and horizontal)
1042
+ - **desktop_get_cursor** - Get current cursor position
1043
+ - **desktop_keyboard_type** - Type text
1044
+ - **desktop_keyboard_key** - Press shortcuts (e.g., `ctrl+c`, `cmd+shift+s`, `enter`)
1045
+ - **desktop_get_screen_size** - Get screen dimensions and scale factor
1046
+ - **desktop_window_list** - List visible windows
1047
+ - **desktop_window_focus** - Bring a window to the foreground
1048
+
1049
+ **Key Design:**
1050
+ - All coordinates are in **physical pixel space** (same as screenshot pixels) — no manual Retina scaling needed
1051
+ - Screenshots use the `__images` convention for automatic multimodal handling across all providers (Anthropic, OpenAI, Google)
1052
+ - Requires `@nut-tree-fork/nut-js` as an optional peer dependency: `npm install @nut-tree-fork/nut-js`
1053
+
1054
+ ### 17. Document Reader (NEW)
1055
+
1056
+ Universal file-to-LLM-content converter. Reads arbitrary document formats and produces clean markdown text with optional image extraction:
1057
+
1058
+ ```typescript
1059
+ import { DocumentReader, mergeTextPieces } from '@everworker/oneringai';
1060
+
1061
+ const reader = DocumentReader.create({
1062
+ defaults: {
1063
+ maxTokens: 50_000,
1064
+ extractImages: true,
1065
+ imageFilter: { minWidth: 100, minHeight: 100 },
1066
+ },
1067
+ });
1068
+
1069
+ // Read from file path, URL, Buffer, or Blob
1070
+ const result = await reader.read('/path/to/report.pdf');
1071
+ const result = await reader.read('https://example.com/doc.xlsx');
1072
+ const result = await reader.read({ type: 'buffer', buffer: myBuffer, filename: 'doc.docx' });
1073
+
1074
+ // Get merged markdown text
1075
+ const markdown = mergeTextPieces(result.pieces);
1076
+
1077
+ // Metadata
1078
+ console.log(result.metadata.format); // 'pdf'
1079
+ console.log(result.metadata.estimatedTokens); // 12500
1080
+ console.log(result.metadata.processingTimeMs); // 234
1081
+ ```
1082
+
1083
+ **Automatic Integration — No Code Changes Needed:**
1084
+ - **`read_file` tool** — Agents calling `read_file` on a PDF, DOCX, or XLSX get markdown text automatically
1085
+ - **`web_fetch` tool** — Documents downloaded from URLs are auto-converted to markdown
1086
+
1087
+ **Content Bridge for Multimodal Input:**
1088
+
1089
+ ```typescript
1090
+ import { readDocumentAsContent } from '@everworker/oneringai';
1091
+
1092
+ // Convert document directly to Content[] for LLM input
1093
+ const content = await readDocumentAsContent('/path/to/slides.pptx', {
1094
+ extractImages: true,
1095
+ imageDetail: 'auto',
1096
+ maxImages: 20,
1097
+ });
1098
+
1099
+ // Use in agent.run() with text + images
1100
+ await agent.run([
1101
+ { type: 'input_text', text: 'Analyze this presentation:' },
1102
+ ...content,
1103
+ ]);
1104
+ ```
1105
+
1106
+ **Pluggable Architecture:**
1107
+ - 6 built-in format handlers (Office, Excel, PDF, HTML, Text, Image)
1108
+ - 3 default transformers (header, table formatting, truncation)
1109
+ - Custom handlers and transformers via `DocumentReader.create({ handlers, ... })`
1110
+ - All heavy dependencies lazy-loaded (officeparser, exceljs, unpdf)
1111
+
1112
+ **Image Filtering:**
1113
+ - Configurable min dimensions, min size, max count, pattern exclusions
1114
+ - Automatically removes junk images (logos, icons, tiny backgrounds)
1115
+ - Applied both at extraction time and at content conversion time
1116
+
1117
+ See the [User Guide](./USER_GUIDE.md#document-reader) for complete API reference and configuration options.
1118
+
1119
+ ### 18. External API Integration
956
1120
 
957
1121
  Connect your AI agents to 35+ external services with enterprise-grade resilience:
958
1122
 
@@ -977,10 +1141,11 @@ Connector.create({
977
1141
  // search_files, search_code, read_file, get_pr, pr_files, pr_comments, create_pr
978
1142
  const tools = ConnectorTools.for('github');
979
1143
 
980
- // Use with an agent
1144
+ // Use with an agent — userId flows to all tools automatically
981
1145
  const agent = Agent.create({
982
1146
  connector: 'openai',
983
1147
  model: 'gpt-4',
1148
+ userId: 'user-123', // All tool API calls use this user's OAuth tokens
984
1149
  tools: tools,
985
1150
  });
986
1151
 
@@ -1303,4 +1468,4 @@ MIT License - See [LICENSE](./LICENSE) file.
1303
1468
 
1304
1469
  ---
1305
1470
 
1306
- **Version:** 0.1.4 | **Last Updated:** 2026-02-08 | **[User Guide](./USER_GUIDE.md)** | **[API Reference](./API_REFERENCE.md)** | **[Changelog](./CHANGELOG.md)**
1471
+ **Version:** 0.2.1 | **Last Updated:** 2026-02-11 | **[User Guide](./USER_GUIDE.md)** | **[API Reference](./API_REFERENCE.md)** | **[Changelog](./CHANGELOG.md)**
@@ -1,5 +1,4 @@
1
1
  import { V as Vendor } from './Vendor-DYh_bzwo.js';
2
- import { I as IProvider } from './IProvider-BP49c93d.js';
3
2
 
4
3
  /**
5
4
  * Connector - Represents authenticated connection to ANY API
@@ -483,336 +482,24 @@ declare class Connector {
483
482
  }
484
483
 
485
484
  /**
486
- * Shared types used across all multimodal capabilities
487
- * This file provides the foundation for Image, Audio, and Video model registries
485
+ * Base provider interface
488
486
  */
489
-
490
- /**
491
- * Aspect ratios - normalized across all visual modalities (images, video)
492
- */
493
- type AspectRatio$1 = '1:1' | '16:9' | '9:16' | '4:3' | '3:4' | '21:9' | '3:2' | '2:3';
494
- /**
495
- * Quality levels - normalized across vendors
496
- * Providers map these to vendor-specific quality settings
497
- */
498
- type QualityLevel = 'draft' | 'standard' | 'high' | 'ultra';
499
- /**
500
- * Audio output formats
501
- */
502
- type AudioFormat = 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm' | 'ogg';
503
- /**
504
- * Output format preference for media
505
- */
506
- type OutputFormat = 'url' | 'base64' | 'buffer';
507
- /**
508
- * Source links for model documentation and maintenance
509
- * Used to track where information came from and when it was last verified
510
- */
511
- interface ISourceLinks {
512
- /** Official documentation URL */
513
- documentation: string;
514
- /** Pricing page URL */
515
- pricing?: string;
516
- /** API reference URL */
517
- apiReference?: string;
518
- /** Additional reference (e.g., blog post, announcement) */
519
- additional?: string;
520
- /** Last verified date (YYYY-MM-DD) */
521
- lastVerified: string;
522
- }
523
- /**
524
- * Vendor-specific option schema for validation and documentation
525
- * Used to describe vendor-specific options that fall outside semantic options
526
- */
527
- interface VendorOptionSchema {
528
- /** Data type of the option */
529
- type: 'string' | 'number' | 'boolean' | 'enum' | 'array';
530
- /** Description of the option */
531
- description: string;
532
- /** Whether the option is required */
533
- required?: boolean;
534
- /** UI display label */
535
- label?: string;
536
- /** Valid values for enum/string types */
537
- enum?: string[];
538
- /** Default value */
539
- default?: unknown;
540
- /** Minimum value for numbers */
541
- min?: number;
542
- /** Maximum value for numbers */
543
- max?: number;
544
- /** Step value for number sliders */
545
- step?: number;
546
- /** UI control type hint */
547
- controlType?: 'select' | 'radio' | 'slider' | 'checkbox' | 'text' | 'textarea';
548
- }
549
- /**
550
- * Base model description - shared by all registries
551
- * Every model registry (Image, TTS, STT, Video) extends this
552
- */
553
- interface IBaseModelDescription {
554
- /** Model identifier (e.g., "dall-e-3", "tts-1") */
555
- name: string;
556
- /** Display name for UI (e.g., "DALL-E 3", "TTS-1") */
557
- displayName: string;
558
- /** Vendor/provider */
559
- provider: Vendor;
560
- /** Model description */
561
- description?: string;
562
- /** Whether the model is currently available */
563
- isActive: boolean;
564
- /** Release date (YYYY-MM-DD) */
565
- releaseDate?: string;
566
- /** Deprecation date if scheduled (YYYY-MM-DD) */
567
- deprecationDate?: string;
568
- /** Documentation/pricing links for maintenance */
569
- sources: ISourceLinks;
570
- }
571
-
572
- /**
573
- * Image generation provider interface
574
- */
575
-
576
- interface ImageGenerateOptions {
577
- model: string;
578
- prompt: string;
579
- size?: string;
580
- aspectRatio?: string;
581
- quality?: 'standard' | 'hd' | 'low' | 'medium' | 'high' | 'auto';
582
- style?: 'vivid' | 'natural';
583
- n?: number;
584
- response_format?: 'url' | 'b64_json';
487
+ interface ProviderCapabilities {
488
+ text: boolean;
489
+ images: boolean;
490
+ videos: boolean;
491
+ audio: boolean;
492
+ /** Optional feature flags for specific capabilities */
493
+ features?: Record<string, boolean>;
585
494
  }
586
- interface ImageEditOptions {
587
- model: string;
588
- image: Buffer | string;
589
- prompt: string;
590
- mask?: Buffer | string;
591
- size?: string;
592
- n?: number;
593
- response_format?: 'url' | 'b64_json';
594
- }
595
- interface ImageVariationOptions {
596
- model: string;
597
- image: Buffer | string;
598
- n?: number;
599
- size?: string;
600
- response_format?: 'url' | 'b64_json';
601
- }
602
- interface ImageResponse {
603
- created: number;
604
- data: Array<{
605
- url?: string;
606
- b64_json?: string;
607
- revised_prompt?: string;
608
- }>;
609
- }
610
- interface IImageProvider extends IProvider {
611
- /**
612
- * Generate images from text prompt
613
- */
614
- generateImage(options: ImageGenerateOptions): Promise<ImageResponse>;
615
- /**
616
- * Edit an existing image (optional - not all providers support)
617
- */
618
- editImage?(options: ImageEditOptions): Promise<ImageResponse>;
619
- /**
620
- * Create variations of an image (optional)
621
- */
622
- createVariation?(options: ImageVariationOptions): Promise<ImageResponse>;
623
- /**
624
- * List available models
625
- */
626
- listModels?(): Promise<string[]>;
627
- }
628
-
629
- /**
630
- * Options for creating an ImageGeneration instance
631
- */
632
- interface ImageGenerationCreateOptions {
633
- /** Connector name or instance */
634
- connector: string | Connector;
635
- }
636
- /**
637
- * Simplified options for quick generation
638
- */
639
- interface SimpleGenerateOptions {
640
- /** Text prompt describing the image */
641
- prompt: string;
642
- /** Model to use (defaults to vendor's best model) */
643
- model?: string;
644
- /** Image size */
645
- size?: string;
646
- /** Quality setting */
647
- quality?: 'standard' | 'hd';
648
- /** Style setting (DALL-E 3 only) */
649
- style?: 'vivid' | 'natural';
650
- /** Number of images to generate */
651
- n?: number;
652
- /** Response format */
653
- response_format?: 'url' | 'b64_json';
654
- }
655
- /**
656
- * ImageGeneration capability class
657
- */
658
- declare class ImageGeneration {
659
- private provider;
660
- private connector;
661
- private defaultModel;
662
- private constructor();
663
- /**
664
- * Create an ImageGeneration instance
665
- */
666
- static create(options: ImageGenerationCreateOptions): ImageGeneration;
667
- /**
668
- * Generate images from a text prompt
669
- */
670
- generate(options: SimpleGenerateOptions): Promise<ImageResponse>;
671
- /**
672
- * Edit an existing image
673
- * Note: Not all models/vendors support this
674
- */
675
- edit(options: ImageEditOptions): Promise<ImageResponse>;
676
- /**
677
- * Create variations of an existing image
678
- * Note: Only DALL-E 2 supports this
679
- */
680
- createVariation(options: ImageVariationOptions): Promise<ImageResponse>;
681
- /**
682
- * List available models for this provider
683
- */
684
- listModels(): Promise<string[]>;
685
- /**
686
- * Get information about a specific model
687
- */
688
- getModelInfo(modelName: string): IImageModelDescription | undefined;
689
- /**
690
- * Get the underlying provider
691
- */
692
- getProvider(): IImageProvider;
693
- /**
694
- * Get the current connector
695
- */
696
- getConnector(): Connector;
697
- /**
698
- * Get the default model for this vendor
699
- */
700
- private getDefaultModel;
495
+ interface IProvider {
496
+ readonly name: string;
497
+ readonly vendor?: string;
498
+ readonly capabilities: ProviderCapabilities;
701
499
  /**
702
- * Get the default edit model for this vendor
500
+ * Validate that the provider configuration is correct
703
501
  */
704
- private getEditModel;
502
+ validateConfig(): Promise<boolean>;
705
503
  }
706
504
 
707
- /**
708
- * Image generation model registry with comprehensive metadata
709
- */
710
-
711
- /**
712
- * Supported image sizes by model
713
- */
714
- type ImageSize = '256x256' | '512x512' | '1024x1024' | '1024x1536' | '1536x1024' | '1792x1024' | '1024x1792' | 'auto';
715
- /**
716
- * Supported aspect ratios
717
- */
718
- type AspectRatio = '1:1' | '3:4' | '4:3' | '9:16' | '16:9' | '3:2' | '2:3';
719
- /**
720
- * Image model capabilities
721
- */
722
- interface ImageModelCapabilities {
723
- /** Supported image sizes */
724
- sizes: readonly ImageSize[];
725
- /** Supported aspect ratios (Google) */
726
- aspectRatios?: readonly AspectRatio[];
727
- /** Maximum number of images per request */
728
- maxImagesPerRequest: number;
729
- /** Supported output formats */
730
- outputFormats: readonly string[];
731
- /** Feature support flags */
732
- features: {
733
- /** Text-to-image generation */
734
- generation: boolean;
735
- /** Image editing/inpainting */
736
- editing: boolean;
737
- /** Image variations */
738
- variations: boolean;
739
- /** Style control */
740
- styleControl: boolean;
741
- /** Quality control (standard/hd) */
742
- qualityControl: boolean;
743
- /** Transparent backgrounds */
744
- transparency: boolean;
745
- /** Prompt revision/enhancement */
746
- promptRevision: boolean;
747
- };
748
- /** Model limits */
749
- limits: {
750
- /** Maximum prompt length in characters */
751
- maxPromptLength: number;
752
- /** Rate limit (requests per minute) */
753
- maxRequestsPerMinute?: number;
754
- };
755
- /** Vendor-specific options schema */
756
- vendorOptions?: Record<string, VendorOptionSchema>;
757
- }
758
- /**
759
- * Image model pricing
760
- */
761
- interface ImageModelPricing {
762
- /** Cost per image at standard quality */
763
- perImageStandard?: number;
764
- /** Cost per image at HD quality */
765
- perImageHD?: number;
766
- /** Cost per image (flat rate) */
767
- perImage?: number;
768
- currency: 'USD';
769
- }
770
- /**
771
- * Complete image model description
772
- */
773
- interface IImageModelDescription extends IBaseModelDescription {
774
- capabilities: ImageModelCapabilities;
775
- pricing?: ImageModelPricing;
776
- }
777
- declare const IMAGE_MODELS: {
778
- readonly openai: {
779
- /** GPT-Image-1: Latest OpenAI image model with best quality */
780
- readonly GPT_IMAGE_1: "gpt-image-1";
781
- /** DALL-E 3: High quality image generation */
782
- readonly DALL_E_3: "dall-e-3";
783
- /** DALL-E 2: Fast, supports editing and variations */
784
- readonly DALL_E_2: "dall-e-2";
785
- };
786
- readonly google: {
787
- /** Imagen 4.0: Latest Google image generation model */
788
- readonly IMAGEN_4_GENERATE: "imagen-4.0-generate-001";
789
- /** Imagen 4.0 Ultra: Highest quality */
790
- readonly IMAGEN_4_ULTRA: "imagen-4.0-ultra-generate-001";
791
- /** Imagen 4.0 Fast: Optimized for speed */
792
- readonly IMAGEN_4_FAST: "imagen-4.0-fast-generate-001";
793
- };
794
- readonly grok: {
795
- /** Grok Imagine Image: xAI image generation with editing support */
796
- readonly GROK_IMAGINE_IMAGE: "grok-imagine-image";
797
- /** Grok 2 Image: xAI image generation (text-only input) */
798
- readonly GROK_2_IMAGE_1212: "grok-2-image-1212";
799
- };
800
- };
801
- /**
802
- * Complete image model registry
803
- * Last full audit: January 2026
804
- */
805
- declare const IMAGE_MODEL_REGISTRY: Record<string, IImageModelDescription>;
806
- declare const getImageModelInfo: (modelName: string) => IImageModelDescription | undefined;
807
- declare const getImageModelsByVendor: (vendor: Vendor) => IImageModelDescription[];
808
- declare const getActiveImageModels: () => IImageModelDescription[];
809
- /**
810
- * Get image models that support a specific feature
811
- */
812
- declare function getImageModelsWithFeature(feature: keyof IImageModelDescription['capabilities']['features']): IImageModelDescription[];
813
- /**
814
- * Calculate estimated cost for image generation
815
- */
816
- declare function calculateImageCost(modelName: string, imageCount: number, quality?: 'standard' | 'hd'): number | null;
817
-
818
- export { type AudioFormat as A, type ImageResponse as B, type ConnectorAccessContext as C, type AspectRatio$1 as D, type OutputFormat as E, type ISourceLinks as F, DEFAULT_CONNECTOR_TIMEOUT as G, DEFAULT_MAX_RETRIES as H, type IConnectorRegistry as I, type JWTConnectorAuth as J, DEFAULT_RETRYABLE_STATUSES as K, DEFAULT_BASE_DELAY_MS as L, DEFAULT_MAX_DELAY_MS as M, type OAuthConnectorAuth as O, type QualityLevel as Q, type StoredToken as S, type VendorOptionSchema as V, type IConnectorAccessPolicy as a, Connector as b, type IBaseModelDescription as c, type IImageProvider as d, type ConnectorFetchOptions as e, type ITokenStorage as f, type ConnectorConfig as g, type ConnectorAuth as h, type ConnectorConfigResult as i, ImageGeneration as j, type ImageGenerationCreateOptions as k, type SimpleGenerateOptions as l, type APIKeyConnectorAuth as m, type IImageModelDescription as n, type ImageModelCapabilities as o, type ImageModelPricing as p, IMAGE_MODELS as q, IMAGE_MODEL_REGISTRY as r, getImageModelInfo as s, getImageModelsByVendor as t, getActiveImageModels as u, getImageModelsWithFeature as v, calculateImageCost as w, type ImageGenerateOptions as x, type ImageEditOptions as y, type ImageVariationOptions as z };
505
+ export { type APIKeyConnectorAuth as A, type ConnectorAccessContext as C, DEFAULT_CONNECTOR_TIMEOUT as D, type IConnectorRegistry as I, type JWTConnectorAuth as J, type OAuthConnectorAuth as O, type ProviderCapabilities as P, type StoredToken as S, type IConnectorAccessPolicy as a, Connector as b, type IProvider as c, type ConnectorFetchOptions as d, type ITokenStorage as e, type ConnectorConfig as f, type ConnectorAuth as g, type ConnectorConfigResult as h, DEFAULT_MAX_RETRIES as i, DEFAULT_RETRYABLE_STATUSES as j, DEFAULT_BASE_DELAY_MS as k, DEFAULT_MAX_DELAY_MS as l };