@everworker/oneringai 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -3
- package/dist/capabilities/agents/index.d.cts +1 -1
- package/dist/capabilities/agents/index.d.ts +1 -1
- package/dist/capabilities/images/index.cjs.map +1 -1
- package/dist/capabilities/images/index.js.map +1 -1
- package/dist/{index-MJ14lkui.d.cts → index-D62LXWdW.d.cts} +9 -0
- package/dist/{index-B5UaeEvK.d.ts → index-DVb6vfA3.d.ts} +9 -0
- package/dist/index.cjs +2829 -585
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +836 -9
- package/dist/index.d.ts +836 -9
- package/dist/index.js +2795 -586
- package/dist/index.js.map +1 -1
- package/package.json +14 -1
package/README.md
CHANGED
|
@@ -27,7 +27,8 @@
|
|
|
27
27
|
- [13. Streaming](#13-streaming)
|
|
28
28
|
- [14. OAuth for External APIs](#14-oauth-for-external-apis)
|
|
29
29
|
- [15. Developer Tools](#15-developer-tools)
|
|
30
|
-
- [16.
|
|
30
|
+
- [16. Document Reader](#16-document-reader-new) — PDF, DOCX, XLSX, PPTX, CSV, HTML, images
|
|
31
|
+
- [17. External API Integration](#17-external-api-integration) — Scoped Registry, Vendor Templates, Tool Discovery
|
|
31
32
|
- [MCP Integration](#mcp-model-context-protocol-integration)
|
|
32
33
|
- [Documentation](#documentation)
|
|
33
34
|
- [Examples](#examples)
|
|
@@ -77,6 +78,8 @@
|
|
|
77
78
|
- 📝 **Persistent Instructions** - NEW: Agent-level custom instructions that persist across sessions on disk
|
|
78
79
|
- 🛠️ **Agentic Workflows** - Built-in tool calling and multi-turn conversations
|
|
79
80
|
- 🔧 **Developer Tools** - NEW: Filesystem and shell tools for coding assistants (read, write, edit, grep, glob, bash)
|
|
81
|
+
- 🖥️ **Desktop Automation** - NEW: OS-level computer use — screenshot, mouse, keyboard, and window control for vision-driven agent loops
|
|
82
|
+
- 📄 **Document Reader** - NEW: Universal file-to-text converter — PDF, DOCX, XLSX, PPTX, CSV, HTML, images auto-converted to markdown
|
|
80
83
|
- 🔌 **MCP Integration** - NEW: Model Context Protocol client for seamless tool discovery from local and remote servers
|
|
81
84
|
- 👁️ **Vision Support** - Analyze images with AI across all providers
|
|
82
85
|
- 📋 **Clipboard Integration** - Paste screenshots directly (like Claude Code!)
|
|
@@ -254,6 +257,55 @@ const veoJob = await googleVideo.generate({
|
|
|
254
257
|
});
|
|
255
258
|
```
|
|
256
259
|
|
|
260
|
+
### Document Reader (NEW)
|
|
261
|
+
|
|
262
|
+
Read any document format — agents automatically get markdown text from PDFs, Word docs, spreadsheets, and more:
|
|
263
|
+
|
|
264
|
+
```typescript
|
|
265
|
+
import { Agent, developerTools } from '@everworker/oneringai';
|
|
266
|
+
|
|
267
|
+
const agent = Agent.create({
|
|
268
|
+
connector: 'openai',
|
|
269
|
+
model: 'gpt-4',
|
|
270
|
+
tools: developerTools,
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
// read_file auto-converts binary documents to markdown
|
|
274
|
+
await agent.run('Read /path/to/report.pdf and summarize the key findings');
|
|
275
|
+
await agent.run('Read /path/to/data.xlsx and describe the trends');
|
|
276
|
+
await agent.run('Read /path/to/presentation.pptx and list all slides');
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
**Programmatic usage:**
|
|
280
|
+
|
|
281
|
+
```typescript
|
|
282
|
+
import { DocumentReader, readDocumentAsContent } from '@everworker/oneringai';
|
|
283
|
+
|
|
284
|
+
// Read any file to markdown pieces
|
|
285
|
+
const reader = DocumentReader.create();
|
|
286
|
+
const result = await reader.read('/path/to/report.pdf');
|
|
287
|
+
console.log(result.pieces); // DocumentPiece[] (text + images)
|
|
288
|
+
|
|
289
|
+
// One-call conversion to LLM Content[] (for multimodal input)
|
|
290
|
+
const content = await readDocumentAsContent('/path/to/slides.pptx', {
|
|
291
|
+
imageFilter: { minWidth: 100, minHeight: 100 },
|
|
292
|
+
imageDetail: 'auto',
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
const response = await agent.run([
|
|
296
|
+
{ type: 'input_text', text: 'Analyze this document:' },
|
|
297
|
+
...content,
|
|
298
|
+
]);
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
**Supported Formats:**
|
|
302
|
+
- **Office**: DOCX, PPTX, ODT, ODP, ODS, RTF (via `officeparser`)
|
|
303
|
+
- **Spreadsheets**: XLSX, CSV (via `exceljs`)
|
|
304
|
+
- **PDF** (via `unpdf`)
|
|
305
|
+
- **HTML** (via Readability + Turndown)
|
|
306
|
+
- **Text**: TXT, MD, JSON, XML, YAML
|
|
307
|
+
- **Images**: PNG, JPG, GIF, WEBP, SVG (pass-through as base64)
|
|
308
|
+
|
|
257
309
|
### Web Search
|
|
258
310
|
|
|
259
311
|
Connector-based web search with multiple providers:
|
|
@@ -963,7 +1015,108 @@ await agent.run('Run npm test and report any failures');
|
|
|
963
1015
|
- Timeout protection (default 2 min)
|
|
964
1016
|
- Output truncation for large outputs
|
|
965
1017
|
|
|
966
|
-
### 16.
|
|
1018
|
+
### 16. Desktop Automation Tools (NEW)
|
|
1019
|
+
|
|
1020
|
+
OS-level desktop automation for building "computer use" agents — screenshot the screen, send to a vision model, receive tool calls (click, type, etc.), execute them, repeat:
|
|
1021
|
+
|
|
1022
|
+
```typescript
|
|
1023
|
+
import { desktopTools } from '@everworker/oneringai';
|
|
1024
|
+
|
|
1025
|
+
const agent = Agent.create({
|
|
1026
|
+
connector: 'openai',
|
|
1027
|
+
model: 'gpt-4',
|
|
1028
|
+
tools: desktopTools, // All 11 desktop tools
|
|
1029
|
+
});
|
|
1030
|
+
|
|
1031
|
+
// Agent can now see and interact with the desktop:
|
|
1032
|
+
await agent.run('Take a screenshot and describe what you see');
|
|
1033
|
+
await agent.run('Open Safari and search for "weather forecast"');
|
|
1034
|
+
```
|
|
1035
|
+
|
|
1036
|
+
**Available Tools:**
|
|
1037
|
+
- **desktop_screenshot** - Capture full screen or region (returns image to vision model)
|
|
1038
|
+
- **desktop_mouse_move** - Move cursor to position
|
|
1039
|
+
- **desktop_mouse_click** - Click (left/right/middle, single/double/triple)
|
|
1040
|
+
- **desktop_mouse_drag** - Drag from one position to another
|
|
1041
|
+
- **desktop_mouse_scroll** - Scroll wheel (vertical and horizontal)
|
|
1042
|
+
- **desktop_get_cursor** - Get current cursor position
|
|
1043
|
+
- **desktop_keyboard_type** - Type text
|
|
1044
|
+
- **desktop_keyboard_key** - Press shortcuts (e.g., `ctrl+c`, `cmd+shift+s`, `enter`)
|
|
1045
|
+
- **desktop_get_screen_size** - Get screen dimensions and scale factor
|
|
1046
|
+
- **desktop_window_list** - List visible windows
|
|
1047
|
+
- **desktop_window_focus** - Bring a window to the foreground
|
|
1048
|
+
|
|
1049
|
+
**Key Design:**
|
|
1050
|
+
- All coordinates are in **physical pixel space** (same as screenshot pixels) — no manual Retina scaling needed
|
|
1051
|
+
- Screenshots use the `__images` convention for automatic multimodal handling across all providers (Anthropic, OpenAI, Google)
|
|
1052
|
+
- Requires `@nut-tree-fork/nut-js` as an optional peer dependency: `npm install @nut-tree-fork/nut-js`
|
|
1053
|
+
|
|
1054
|
+
### 17. Document Reader (NEW)
|
|
1055
|
+
|
|
1056
|
+
Universal file-to-LLM-content converter. Reads arbitrary document formats and produces clean markdown text with optional image extraction:
|
|
1057
|
+
|
|
1058
|
+
```typescript
|
|
1059
|
+
import { DocumentReader, mergeTextPieces } from '@everworker/oneringai';
|
|
1060
|
+
|
|
1061
|
+
const reader = DocumentReader.create({
|
|
1062
|
+
defaults: {
|
|
1063
|
+
maxTokens: 50_000,
|
|
1064
|
+
extractImages: true,
|
|
1065
|
+
imageFilter: { minWidth: 100, minHeight: 100 },
|
|
1066
|
+
},
|
|
1067
|
+
});
|
|
1068
|
+
|
|
1069
|
+
// Read from file path, URL, Buffer, or Blob
|
|
1070
|
+
const result = await reader.read('/path/to/report.pdf');
|
|
1071
|
+
const result = await reader.read('https://example.com/doc.xlsx');
|
|
1072
|
+
const result = await reader.read({ type: 'buffer', buffer: myBuffer, filename: 'doc.docx' });
|
|
1073
|
+
|
|
1074
|
+
// Get merged markdown text
|
|
1075
|
+
const markdown = mergeTextPieces(result.pieces);
|
|
1076
|
+
|
|
1077
|
+
// Metadata
|
|
1078
|
+
console.log(result.metadata.format); // 'pdf'
|
|
1079
|
+
console.log(result.metadata.estimatedTokens); // 12500
|
|
1080
|
+
console.log(result.metadata.processingTimeMs); // 234
|
|
1081
|
+
```
|
|
1082
|
+
|
|
1083
|
+
**Automatic Integration — No Code Changes Needed:**
|
|
1084
|
+
- **`read_file` tool** — Agents calling `read_file` on a PDF, DOCX, or XLSX get markdown text automatically
|
|
1085
|
+
- **`web_fetch` tool** — Documents downloaded from URLs are auto-converted to markdown
|
|
1086
|
+
|
|
1087
|
+
**Content Bridge for Multimodal Input:**
|
|
1088
|
+
|
|
1089
|
+
```typescript
|
|
1090
|
+
import { readDocumentAsContent } from '@everworker/oneringai';
|
|
1091
|
+
|
|
1092
|
+
// Convert document directly to Content[] for LLM input
|
|
1093
|
+
const content = await readDocumentAsContent('/path/to/slides.pptx', {
|
|
1094
|
+
extractImages: true,
|
|
1095
|
+
imageDetail: 'auto',
|
|
1096
|
+
maxImages: 20,
|
|
1097
|
+
});
|
|
1098
|
+
|
|
1099
|
+
// Use in agent.run() with text + images
|
|
1100
|
+
await agent.run([
|
|
1101
|
+
{ type: 'input_text', text: 'Analyze this presentation:' },
|
|
1102
|
+
...content,
|
|
1103
|
+
]);
|
|
1104
|
+
```
|
|
1105
|
+
|
|
1106
|
+
**Pluggable Architecture:**
|
|
1107
|
+
- 6 built-in format handlers (Office, Excel, PDF, HTML, Text, Image)
|
|
1108
|
+
- 3 default transformers (header, table formatting, truncation)
|
|
1109
|
+
- Custom handlers and transformers via `DocumentReader.create({ handlers, ... })`
|
|
1110
|
+
- All heavy dependencies lazy-loaded (officeparser, exceljs, unpdf)
|
|
1111
|
+
|
|
1112
|
+
**Image Filtering:**
|
|
1113
|
+
- Configurable min dimensions, min size, max count, pattern exclusions
|
|
1114
|
+
- Automatically removes junk images (logos, icons, tiny backgrounds)
|
|
1115
|
+
- Applied both at extraction time and at content conversion time
|
|
1116
|
+
|
|
1117
|
+
See the [User Guide](./USER_GUIDE.md#document-reader) for complete API reference and configuration options.
|
|
1118
|
+
|
|
1119
|
+
### 18. External API Integration
|
|
967
1120
|
|
|
968
1121
|
Connect your AI agents to 35+ external services with enterprise-grade resilience:
|
|
969
1122
|
|
|
@@ -1315,4 +1468,4 @@ MIT License - See [LICENSE](./LICENSE) file.
|
|
|
1315
1468
|
|
|
1316
1469
|
---
|
|
1317
1470
|
|
|
1318
|
-
**Version:** 0.2.
|
|
1471
|
+
**Version:** 0.2.1 | **Last Updated:** 2026-02-11 | **[User Guide](./USER_GUIDE.md)** | **[API Reference](./API_REFERENCE.md)** | **[Changelog](./CHANGELOG.md)**
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { aD as AfterToolContext, av as AgentEventName, A as AgentEvents, ay as AgenticLoopEventName, ax as AgenticLoopEvents, aG as ApprovalResult, aE as ApproveToolContext, m as AuditEntry, aC as BeforeToolContext, aI as ExecutionCompleteEvent, aw as ExecutionConfig, E as ExecutionContext, l as ExecutionMetrics, aH as ExecutionStartEvent, j as HistoryMode, aA as Hook, H as HookConfig, au as HookManager, az as HookName, aL as LLMRequestEvent, aM as LLMResponseEvent, aB as ModifyingHook, aK as ToolCompleteEvent, aF as ToolModification, aJ as ToolStartEvent } from '../../index-
|
|
1
|
+
export { aD as AfterToolContext, av as AgentEventName, A as AgentEvents, ay as AgenticLoopEventName, ax as AgenticLoopEvents, aG as ApprovalResult, aE as ApproveToolContext, m as AuditEntry, aC as BeforeToolContext, aI as ExecutionCompleteEvent, aw as ExecutionConfig, E as ExecutionContext, l as ExecutionMetrics, aH as ExecutionStartEvent, j as HistoryMode, aA as Hook, H as HookConfig, au as HookManager, az as HookName, aL as LLMRequestEvent, aM as LLMResponseEvent, aB as ModifyingHook, aK as ToolCompleteEvent, aF as ToolModification, aJ as ToolStartEvent } from '../../index-D62LXWdW.cjs';
|
|
2
2
|
import '../../IProvider-c4QCbPjn.cjs';
|
|
3
3
|
import '../../Vendor-DYh_bzwo.cjs';
|
|
4
4
|
import 'eventemitter3';
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { aD as AfterToolContext, av as AgentEventName, A as AgentEvents, ay as AgenticLoopEventName, ax as AgenticLoopEvents, aG as ApprovalResult, aE as ApproveToolContext, m as AuditEntry, aC as BeforeToolContext, aI as ExecutionCompleteEvent, aw as ExecutionConfig, E as ExecutionContext, l as ExecutionMetrics, aH as ExecutionStartEvent, j as HistoryMode, aA as Hook, H as HookConfig, au as HookManager, az as HookName, aL as LLMRequestEvent, aM as LLMResponseEvent, aB as ModifyingHook, aK as ToolCompleteEvent, aF as ToolModification, aJ as ToolStartEvent } from '../../index-
|
|
1
|
+
export { aD as AfterToolContext, av as AgentEventName, A as AgentEvents, ay as AgenticLoopEventName, ax as AgenticLoopEvents, aG as ApprovalResult, aE as ApproveToolContext, m as AuditEntry, aC as BeforeToolContext, aI as ExecutionCompleteEvent, aw as ExecutionConfig, E as ExecutionContext, l as ExecutionMetrics, aH as ExecutionStartEvent, j as HistoryMode, aA as Hook, H as HookConfig, au as HookManager, az as HookName, aL as LLMRequestEvent, aM as LLMResponseEvent, aB as ModifyingHook, aK as ToolCompleteEvent, aF as ToolModification, aJ as ToolStartEvent } from '../../index-DVb6vfA3.js';
|
|
2
2
|
import '../../IProvider-DcYJ3YE-.js';
|
|
3
3
|
import '../../Vendor-DYh_bzwo.js';
|
|
4
4
|
import 'eventemitter3';
|