@everworker/oneringai 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -27,7 +27,8 @@
27
27
  - [13. Streaming](#13-streaming)
28
28
  - [14. OAuth for External APIs](#14-oauth-for-external-apis)
29
29
  - [15. Developer Tools](#15-developer-tools)
30
- - [16. External API Integration](#16-external-api-integration) — Scoped Registry, Vendor Templates, Tool Discovery
30
+ - [16. Document Reader](#16-document-reader-new) — PDF, DOCX, XLSX, PPTX, CSV, HTML, images
31
+ - [17. External API Integration](#17-external-api-integration) — Scoped Registry, Vendor Templates, Tool Discovery
31
32
  - [MCP Integration](#mcp-model-context-protocol-integration)
32
33
  - [Documentation](#documentation)
33
34
  - [Examples](#examples)
@@ -54,6 +55,15 @@
54
55
 
55
56
  ---
56
57
 
58
+ ## HOSEA APP
59
+ We realize that library alone in these times is not enough to get you excited, so we built a FREE FOREVER desktop app on top of this library to showcase its power! It's as easy to start using as cloning this library's repo, and then `cd apps/hosea` and then `npm install` and then `npm run dev`. Or watch the video first:
60
+
61
+ ![Watch the demo](https://img.youtube.com/vi/_LzDiuOQD8Y/maxresdefault.jpg)](https://www.youtube.com/watch?v=_LzDiuOQD8Y)
62
+
63
+ Or read the more detailed installation / setup instructions [here](https://github.com/Integrail/oneringai/blob/main/apps/hosea/README.md)
64
+
65
+ Better to see once and then dig in the code! :)
66
+
57
67
  ## Features
58
68
 
59
69
  - ✨ **Unified API** - One interface for 10+ AI providers (OpenAI, Anthropic, Google, Groq, DeepSeek, and more)
@@ -77,6 +87,8 @@
77
87
  - 📝 **Persistent Instructions** - NEW: Agent-level custom instructions that persist across sessions on disk
78
88
  - 🛠️ **Agentic Workflows** - Built-in tool calling and multi-turn conversations
79
89
  - 🔧 **Developer Tools** - NEW: Filesystem and shell tools for coding assistants (read, write, edit, grep, glob, bash)
90
+ - 🖥️ **Desktop Automation** - NEW: OS-level computer use — screenshot, mouse, keyboard, and window control for vision-driven agent loops
91
+ - 📄 **Document Reader** - NEW: Universal file-to-text converter — PDF, DOCX, XLSX, PPTX, CSV, HTML, images auto-converted to markdown
80
92
  - 🔌 **MCP Integration** - NEW: Model Context Protocol client for seamless tool discovery from local and remote servers
81
93
  - 👁️ **Vision Support** - Analyze images with AI across all providers
82
94
  - 📋 **Clipboard Integration** - Paste screenshots directly (like Claude Code!)
@@ -254,6 +266,55 @@ const veoJob = await googleVideo.generate({
254
266
  });
255
267
  ```
256
268
 
269
+ ### Document Reader (NEW)
270
+
271
+ Read any document format — agents automatically get markdown text from PDFs, Word docs, spreadsheets, and more:
272
+
273
+ ```typescript
274
+ import { Agent, developerTools } from '@everworker/oneringai';
275
+
276
+ const agent = Agent.create({
277
+ connector: 'openai',
278
+ model: 'gpt-4',
279
+ tools: developerTools,
280
+ });
281
+
282
+ // read_file auto-converts binary documents to markdown
283
+ await agent.run('Read /path/to/report.pdf and summarize the key findings');
284
+ await agent.run('Read /path/to/data.xlsx and describe the trends');
285
+ await agent.run('Read /path/to/presentation.pptx and list all slides');
286
+ ```
287
+
288
+ **Programmatic usage:**
289
+
290
+ ```typescript
291
+ import { DocumentReader, readDocumentAsContent } from '@everworker/oneringai';
292
+
293
+ // Read any file to markdown pieces
294
+ const reader = DocumentReader.create();
295
+ const result = await reader.read('/path/to/report.pdf');
296
+ console.log(result.pieces); // DocumentPiece[] (text + images)
297
+
298
+ // One-call conversion to LLM Content[] (for multimodal input)
299
+ const content = await readDocumentAsContent('/path/to/slides.pptx', {
300
+ imageFilter: { minWidth: 100, minHeight: 100 },
301
+ imageDetail: 'auto',
302
+ });
303
+
304
+ const response = await agent.run([
305
+ { type: 'input_text', text: 'Analyze this document:' },
306
+ ...content,
307
+ ]);
308
+ ```
309
+
310
+ **Supported Formats:**
311
+ - **Office**: DOCX, PPTX, ODT, ODP, ODS, RTF (via `officeparser`)
312
+ - **Spreadsheets**: XLSX, CSV (via `exceljs`)
313
+ - **PDF** (via `unpdf`)
314
+ - **HTML** (via Readability + Turndown)
315
+ - **Text**: TXT, MD, JSON, XML, YAML
316
+ - **Images**: PNG, JPG, GIF, WEBP, SVG (pass-through as base64)
317
+
257
318
  ### Web Search
258
319
 
259
320
  Connector-based web search with multiple providers:
@@ -963,7 +1024,108 @@ await agent.run('Run npm test and report any failures');
963
1024
  - Timeout protection (default 2 min)
964
1025
  - Output truncation for large outputs
965
1026
 
966
- ### 16. External API Integration
1027
+ ### 16. Desktop Automation Tools (NEW)
1028
+
1029
+ OS-level desktop automation for building "computer use" agents — screenshot the screen, send to a vision model, receive tool calls (click, type, etc.), execute them, repeat:
1030
+
1031
+ ```typescript
1032
+ import { desktopTools } from '@everworker/oneringai';
1033
+
1034
+ const agent = Agent.create({
1035
+ connector: 'openai',
1036
+ model: 'gpt-4',
1037
+ tools: desktopTools, // All 11 desktop tools
1038
+ });
1039
+
1040
+ // Agent can now see and interact with the desktop:
1041
+ await agent.run('Take a screenshot and describe what you see');
1042
+ await agent.run('Open Safari and search for "weather forecast"');
1043
+ ```
1044
+
1045
+ **Available Tools:**
1046
+ - **desktop_screenshot** - Capture full screen or region (returns image to vision model)
1047
+ - **desktop_mouse_move** - Move cursor to position
1048
+ - **desktop_mouse_click** - Click (left/right/middle, single/double/triple)
1049
+ - **desktop_mouse_drag** - Drag from one position to another
1050
+ - **desktop_mouse_scroll** - Scroll wheel (vertical and horizontal)
1051
+ - **desktop_get_cursor** - Get current cursor position
1052
+ - **desktop_keyboard_type** - Type text
1053
+ - **desktop_keyboard_key** - Press shortcuts (e.g., `ctrl+c`, `cmd+shift+s`, `enter`)
1054
+ - **desktop_get_screen_size** - Get screen dimensions and scale factor
1055
+ - **desktop_window_list** - List visible windows
1056
+ - **desktop_window_focus** - Bring a window to the foreground
1057
+
1058
+ **Key Design:**
1059
+ - All coordinates are in **physical pixel space** (same as screenshot pixels) — no manual Retina scaling needed
1060
+ - Screenshots use the `__images` convention for automatic multimodal handling across all providers (Anthropic, OpenAI, Google)
1061
+ - Requires `@nut-tree-fork/nut-js` as an optional peer dependency: `npm install @nut-tree-fork/nut-js`
1062
+
1063
+ ### 17. Document Reader (NEW)
1064
+
1065
+ Universal file-to-LLM-content converter. Reads arbitrary document formats and produces clean markdown text with optional image extraction:
1066
+
1067
+ ```typescript
1068
+ import { DocumentReader, mergeTextPieces } from '@everworker/oneringai';
1069
+
1070
+ const reader = DocumentReader.create({
1071
+ defaults: {
1072
+ maxTokens: 50_000,
1073
+ extractImages: true,
1074
+ imageFilter: { minWidth: 100, minHeight: 100 },
1075
+ },
1076
+ });
1077
+
1078
+ // Read from file path, URL, Buffer, or Blob
1079
+ const result = await reader.read('/path/to/report.pdf');
1080
+ const result = await reader.read('https://example.com/doc.xlsx');
1081
+ const result = await reader.read({ type: 'buffer', buffer: myBuffer, filename: 'doc.docx' });
1082
+
1083
+ // Get merged markdown text
1084
+ const markdown = mergeTextPieces(result.pieces);
1085
+
1086
+ // Metadata
1087
+ console.log(result.metadata.format); // 'pdf'
1088
+ console.log(result.metadata.estimatedTokens); // 12500
1089
+ console.log(result.metadata.processingTimeMs); // 234
1090
+ ```
1091
+
1092
+ **Automatic Integration — No Code Changes Needed:**
1093
+ - **`read_file` tool** — Agents calling `read_file` on a PDF, DOCX, or XLSX get markdown text automatically
1094
+ - **`web_fetch` tool** — Documents downloaded from URLs are auto-converted to markdown
1095
+
1096
+ **Content Bridge for Multimodal Input:**
1097
+
1098
+ ```typescript
1099
+ import { readDocumentAsContent } from '@everworker/oneringai';
1100
+
1101
+ // Convert document directly to Content[] for LLM input
1102
+ const content = await readDocumentAsContent('/path/to/slides.pptx', {
1103
+ extractImages: true,
1104
+ imageDetail: 'auto',
1105
+ maxImages: 20,
1106
+ });
1107
+
1108
+ // Use in agent.run() with text + images
1109
+ await agent.run([
1110
+ { type: 'input_text', text: 'Analyze this presentation:' },
1111
+ ...content,
1112
+ ]);
1113
+ ```
1114
+
1115
+ **Pluggable Architecture:**
1116
+ - 6 built-in format handlers (Office, Excel, PDF, HTML, Text, Image)
1117
+ - 3 default transformers (header, table formatting, truncation)
1118
+ - Custom handlers and transformers via `DocumentReader.create({ handlers, ... })`
1119
+ - All heavy dependencies lazy-loaded (officeparser, exceljs, unpdf)
1120
+
1121
+ **Image Filtering:**
1122
+ - Configurable min dimensions, min size, max count, pattern exclusions
1123
+ - Automatically removes junk images (logos, icons, tiny backgrounds)
1124
+ - Applied both at extraction time and at content conversion time
1125
+
1126
+ See the [User Guide](./USER_GUIDE.md#document-reader) for complete API reference and configuration options.
1127
+
1128
+ ### 18. External API Integration
967
1129
 
968
1130
  Connect your AI agents to 35+ external services with enterprise-grade resilience:
969
1131
 
@@ -1315,4 +1477,4 @@ MIT License - See [LICENSE](./LICENSE) file.
1315
1477
 
1316
1478
  ---
1317
1479
 
1318
- **Version:** 0.2.0 | **Last Updated:** 2026-02-09 | **[User Guide](./USER_GUIDE.md)** | **[API Reference](./API_REFERENCE.md)** | **[Changelog](./CHANGELOG.md)**
1480
+ **Version:** 0.2.1 | **Last Updated:** 2026-02-11 | **[User Guide](./USER_GUIDE.md)** | **[API Reference](./API_REFERENCE.md)** | **[Changelog](./CHANGELOG.md)**
@@ -1,4 +1,4 @@
1
- export { aD as AfterToolContext, av as AgentEventName, A as AgentEvents, ay as AgenticLoopEventName, ax as AgenticLoopEvents, aG as ApprovalResult, aE as ApproveToolContext, m as AuditEntry, aC as BeforeToolContext, aI as ExecutionCompleteEvent, aw as ExecutionConfig, E as ExecutionContext, l as ExecutionMetrics, aH as ExecutionStartEvent, j as HistoryMode, aA as Hook, H as HookConfig, au as HookManager, az as HookName, aL as LLMRequestEvent, aM as LLMResponseEvent, aB as ModifyingHook, aK as ToolCompleteEvent, aF as ToolModification, aJ as ToolStartEvent } from '../../index-MJ14lkui.cjs';
1
+ export { aD as AfterToolContext, av as AgentEventName, A as AgentEvents, ay as AgenticLoopEventName, ax as AgenticLoopEvents, aG as ApprovalResult, aE as ApproveToolContext, m as AuditEntry, aC as BeforeToolContext, aI as ExecutionCompleteEvent, aw as ExecutionConfig, E as ExecutionContext, l as ExecutionMetrics, aH as ExecutionStartEvent, j as HistoryMode, aA as Hook, H as HookConfig, au as HookManager, az as HookName, aL as LLMRequestEvent, aM as LLMResponseEvent, aB as ModifyingHook, aK as ToolCompleteEvent, aF as ToolModification, aJ as ToolStartEvent } from '../../index-D62LXWdW.cjs';
2
2
  import '../../IProvider-c4QCbPjn.cjs';
3
3
  import '../../Vendor-DYh_bzwo.cjs';
4
4
  import 'eventemitter3';
@@ -1,4 +1,4 @@
1
- export { aD as AfterToolContext, av as AgentEventName, A as AgentEvents, ay as AgenticLoopEventName, ax as AgenticLoopEvents, aG as ApprovalResult, aE as ApproveToolContext, m as AuditEntry, aC as BeforeToolContext, aI as ExecutionCompleteEvent, aw as ExecutionConfig, E as ExecutionContext, l as ExecutionMetrics, aH as ExecutionStartEvent, j as HistoryMode, aA as Hook, H as HookConfig, au as HookManager, az as HookName, aL as LLMRequestEvent, aM as LLMResponseEvent, aB as ModifyingHook, aK as ToolCompleteEvent, aF as ToolModification, aJ as ToolStartEvent } from '../../index-B5UaeEvK.js';
1
+ export { aD as AfterToolContext, av as AgentEventName, A as AgentEvents, ay as AgenticLoopEventName, ax as AgenticLoopEvents, aG as ApprovalResult, aE as ApproveToolContext, m as AuditEntry, aC as BeforeToolContext, aI as ExecutionCompleteEvent, aw as ExecutionConfig, E as ExecutionContext, l as ExecutionMetrics, aH as ExecutionStartEvent, j as HistoryMode, aA as Hook, H as HookConfig, au as HookManager, az as HookName, aL as LLMRequestEvent, aM as LLMResponseEvent, aB as ModifyingHook, aK as ToolCompleteEvent, aF as ToolModification, aJ as ToolStartEvent } from '../../index-DVb6vfA3.js';
2
2
  import '../../IProvider-DcYJ3YE-.js';
3
3
  import '../../Vendor-DYh_bzwo.js';
4
4
  import 'eventemitter3';
@@ -342,14 +342,32 @@ var AuthCodePKCEFlow = class {
342
342
  if (this.config.usePKCE !== false && verifierData) {
343
343
  params.append("code_verifier", verifierData.verifier);
344
344
  }
345
- const response = await fetch(this.config.tokenUrl, {
345
+ let response = await fetch(this.config.tokenUrl, {
346
346
  method: "POST",
347
347
  headers: {
348
348
  "Content-Type": "application/x-www-form-urlencoded"
349
349
  },
350
350
  body: params
351
351
  });
352
- if (!response.ok) {
352
+ if (!response.ok && this.config.clientSecret) {
353
+ const errorText = await response.text();
354
+ if (isPublicClientError(errorText)) {
355
+ params.delete("client_secret");
356
+ response = await fetch(this.config.tokenUrl, {
357
+ method: "POST",
358
+ headers: {
359
+ "Content-Type": "application/x-www-form-urlencoded"
360
+ },
361
+ body: params
362
+ });
363
+ if (!response.ok) {
364
+ const retryError = await response.text();
365
+ throw new Error(`Token exchange failed: ${response.status} ${response.statusText} - ${retryError}`);
366
+ }
367
+ } else {
368
+ throw new Error(`Token exchange failed: ${response.status} ${response.statusText} - ${errorText}`);
369
+ }
370
+ } else if (!response.ok) {
353
371
  const error = await response.text();
354
372
  throw new Error(`Token exchange failed: ${response.status} ${response.statusText} - ${error}`);
355
373
  }
@@ -395,14 +413,32 @@ var AuthCodePKCEFlow = class {
395
413
  if (this.config.clientSecret) {
396
414
  params.append("client_secret", this.config.clientSecret);
397
415
  }
398
- const response = await fetch(this.config.tokenUrl, {
416
+ let response = await fetch(this.config.tokenUrl, {
399
417
  method: "POST",
400
418
  headers: {
401
419
  "Content-Type": "application/x-www-form-urlencoded"
402
420
  },
403
421
  body: params
404
422
  });
405
- if (!response.ok) {
423
+ if (!response.ok && this.config.clientSecret) {
424
+ const errorText = await response.text();
425
+ if (isPublicClientError(errorText)) {
426
+ params.delete("client_secret");
427
+ response = await fetch(this.config.tokenUrl, {
428
+ method: "POST",
429
+ headers: {
430
+ "Content-Type": "application/x-www-form-urlencoded"
431
+ },
432
+ body: params
433
+ });
434
+ if (!response.ok) {
435
+ const retryError = await response.text();
436
+ throw new Error(`Token refresh failed: ${response.status} ${response.statusText} - ${retryError}`);
437
+ }
438
+ } else {
439
+ throw new Error(`Token refresh failed: ${response.status} ${response.statusText} - ${errorText}`);
440
+ }
441
+ } else if (!response.ok) {
406
442
  const error = await response.text();
407
443
  throw new Error(`Token refresh failed: ${response.status} ${response.statusText} - ${error}`);
408
444
  }
@@ -457,6 +493,10 @@ var AuthCodePKCEFlow = class {
457
493
  }
458
494
  }
459
495
  };
496
+ function isPublicClientError(responseBody) {
497
+ const lower = responseBody.toLowerCase();
498
+ return lower.includes("aadsts700025") || lower.includes("invalid_client") && lower.includes("public");
499
+ }
460
500
 
461
501
  // src/connectors/oauth/flows/ClientCredentials.ts
462
502
  var ClientCredentialsFlow = class {