npm - @everworker/oneringai - Versions diffs - 0.2.0 → 0.2.2 - Mend

@everworker/oneringai 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +165 -3
package/dist/capabilities/agents/index.d.cts +1 -1
package/dist/capabilities/agents/index.d.ts +1 -1
package/dist/capabilities/images/index.cjs +44 -4
package/dist/capabilities/images/index.cjs.map +1 -1
package/dist/capabilities/images/index.js +44 -4
package/dist/capabilities/images/index.js.map +1 -1
package/dist/{index-MJ14lkui.d.cts → index-D62LXWdW.d.cts} +9 -0
package/dist/{index-B5UaeEvK.d.ts → index-DVb6vfA3.d.ts} +9 -0
package/dist/index.cjs +3199 -610
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +840 -9
package/dist/index.d.ts +840 -9
package/dist/index.js +3165 -611
package/dist/index.js.map +1 -1
package/dist/shared/index.cjs +17 -0
package/dist/shared/index.cjs.map +1 -1
package/dist/shared/index.js +17 -0
package/dist/shared/index.js.map +1 -1
package/package.json +14 -1

package/README.md CHANGED Viewed

@@ -27,7 +27,8 @@
   - [13. Streaming](#13-streaming)
   - [14. OAuth for External APIs](#14-oauth-for-external-apis)
   - [15. Developer Tools](#15-developer-tools)
-  - [16. External API Integration](#16-external-api-integration) — Scoped Registry, Vendor Templates, Tool Discovery
+  - [16. Document Reader](#16-document-reader-new) — PDF, DOCX, XLSX, PPTX, CSV, HTML, images
+  - [17. External API Integration](#17-external-api-integration) — Scoped Registry, Vendor Templates, Tool Discovery
 - [MCP Integration](#mcp-model-context-protocol-integration)
 - [Documentation](#documentation)
 - [Examples](#examples)
@@ -54,6 +55,15 @@
 ---
+## HOSEA APP
+We realize that library alone in these times is not enough to get you excited, so we built a FREE FOREVER desktop app on top of this library to showcase its power! It's as easy to start using as cloning this library's repo, and then `cd apps/hosea` and then `npm install` and then `npm run dev`. Or watch the video first:
+![Watch the demo](https://img.youtube.com/vi/_LzDiuOQD8Y/maxresdefault.jpg)](https://www.youtube.com/watch?v=_LzDiuOQD8Y)
+Or read the more detailed installation / setup instructions [here](https://github.com/Integrail/oneringai/blob/main/apps/hosea/README.md)
+Better to see once and then dig in the code! :)
 ## Features
 - ✨ **Unified API** - One interface for 10+ AI providers (OpenAI, Anthropic, Google, Groq, DeepSeek, and more)
@@ -77,6 +87,8 @@
 - 📝 **Persistent Instructions** - NEW: Agent-level custom instructions that persist across sessions on disk
 - 🛠️ **Agentic Workflows** - Built-in tool calling and multi-turn conversations
 - 🔧 **Developer Tools** - NEW: Filesystem and shell tools for coding assistants (read, write, edit, grep, glob, bash)
+- 🖥️ **Desktop Automation** - NEW: OS-level computer use — screenshot, mouse, keyboard, and window control for vision-driven agent loops
+- 📄 **Document Reader** - NEW: Universal file-to-text converter — PDF, DOCX, XLSX, PPTX, CSV, HTML, images auto-converted to markdown
 - 🔌 **MCP Integration** - NEW: Model Context Protocol client for seamless tool discovery from local and remote servers
 - 👁️ **Vision Support** - Analyze images with AI across all providers
 - 📋 **Clipboard Integration** - Paste screenshots directly (like Claude Code!)
@@ -254,6 +266,55 @@ const veoJob = await googleVideo.generate({
 });
 ```
+### Document Reader (NEW)
+Read any document format — agents automatically get markdown text from PDFs, Word docs, spreadsheets, and more:
+```typescript
+import { Agent, developerTools } from '@everworker/oneringai';
+const agent = Agent.create({
+  connector: 'openai',
+  model: 'gpt-4',
+  tools: developerTools,
+});
+// read_file auto-converts binary documents to markdown
+await agent.run('Read /path/to/report.pdf and summarize the key findings');
+await agent.run('Read /path/to/data.xlsx and describe the trends');
+await agent.run('Read /path/to/presentation.pptx and list all slides');
+```
+**Programmatic usage:**
+```typescript
+import { DocumentReader, readDocumentAsContent } from '@everworker/oneringai';
+// Read any file to markdown pieces
+const reader = DocumentReader.create();
+const result = await reader.read('/path/to/report.pdf');
+console.log(result.pieces); // DocumentPiece[] (text + images)
+// One-call conversion to LLM Content[] (for multimodal input)
+const content = await readDocumentAsContent('/path/to/slides.pptx', {
+  imageFilter: { minWidth: 100, minHeight: 100 },
+  imageDetail: 'auto',
+});
+const response = await agent.run([
+  { type: 'input_text', text: 'Analyze this document:' },
+  ...content,
+]);
+```
+**Supported Formats:**
+- **Office**: DOCX, PPTX, ODT, ODP, ODS, RTF (via `officeparser`)
+- **Spreadsheets**: XLSX, CSV (via `exceljs`)
+- **PDF** (via `unpdf`)
+- **HTML** (via Readability + Turndown)
+- **Text**: TXT, MD, JSON, XML, YAML
+- **Images**: PNG, JPG, GIF, WEBP, SVG (pass-through as base64)
 ### Web Search
 Connector-based web search with multiple providers:
@@ -963,7 +1024,108 @@ await agent.run('Run npm test and report any failures');
 - Timeout protection (default 2 min)
 - Output truncation for large outputs
-### 16. External API Integration
+### 16. Desktop Automation Tools (NEW)
+OS-level desktop automation for building "computer use" agents — screenshot the screen, send to a vision model, receive tool calls (click, type, etc.), execute them, repeat:
+```typescript
+import { desktopTools } from '@everworker/oneringai';
+const agent = Agent.create({
+  connector: 'openai',
+  model: 'gpt-4',
+  tools: desktopTools, // All 11 desktop tools
+});
+// Agent can now see and interact with the desktop:
+await agent.run('Take a screenshot and describe what you see');
+await agent.run('Open Safari and search for "weather forecast"');
+```
+**Available Tools:**
+- **desktop_screenshot** - Capture full screen or region (returns image to vision model)
+- **desktop_mouse_move** - Move cursor to position
+- **desktop_mouse_click** - Click (left/right/middle, single/double/triple)
+- **desktop_mouse_drag** - Drag from one position to another
+- **desktop_mouse_scroll** - Scroll wheel (vertical and horizontal)
+- **desktop_get_cursor** - Get current cursor position
+- **desktop_keyboard_type** - Type text
+- **desktop_keyboard_key** - Press shortcuts (e.g., `ctrl+c`, `cmd+shift+s`, `enter`)
+- **desktop_get_screen_size** - Get screen dimensions and scale factor
+- **desktop_window_list** - List visible windows
+- **desktop_window_focus** - Bring a window to the foreground
+**Key Design:**
+- All coordinates are in **physical pixel space** (same as screenshot pixels) — no manual Retina scaling needed
+- Screenshots use the `__images` convention for automatic multimodal handling across all providers (Anthropic, OpenAI, Google)
+- Requires `@nut-tree-fork/nut-js` as an optional peer dependency: `npm install @nut-tree-fork/nut-js`
+### 17. Document Reader (NEW)
+Universal file-to-LLM-content converter. Reads arbitrary document formats and produces clean markdown text with optional image extraction:
+```typescript
+import { DocumentReader, mergeTextPieces } from '@everworker/oneringai';
+const reader = DocumentReader.create({
+  defaults: {
+    maxTokens: 50_000,
+    extractImages: true,
+    imageFilter: { minWidth: 100, minHeight: 100 },
+  },
+});
+// Read from file path, URL, Buffer, or Blob
+const result = await reader.read('/path/to/report.pdf');
+const result = await reader.read('https://example.com/doc.xlsx');
+const result = await reader.read({ type: 'buffer', buffer: myBuffer, filename: 'doc.docx' });
+// Get merged markdown text
+const markdown = mergeTextPieces(result.pieces);
+// Metadata
+console.log(result.metadata.format);          // 'pdf'
+console.log(result.metadata.estimatedTokens); // 12500
+console.log(result.metadata.processingTimeMs); // 234
+```
+**Automatic Integration — No Code Changes Needed:**
+- **`read_file` tool** — Agents calling `read_file` on a PDF, DOCX, or XLSX get markdown text automatically
+- **`web_fetch` tool** — Documents downloaded from URLs are auto-converted to markdown
+**Content Bridge for Multimodal Input:**
+```typescript
+import { readDocumentAsContent } from '@everworker/oneringai';
+// Convert document directly to Content[] for LLM input
+const content = await readDocumentAsContent('/path/to/slides.pptx', {
+  extractImages: true,
+  imageDetail: 'auto',
+  maxImages: 20,
+});
+// Use in agent.run() with text + images
+await agent.run([
+  { type: 'input_text', text: 'Analyze this presentation:' },
+  ...content,
+]);
+```
+**Pluggable Architecture:**
+- 6 built-in format handlers (Office, Excel, PDF, HTML, Text, Image)
+- 3 default transformers (header, table formatting, truncation)
+- Custom handlers and transformers via `DocumentReader.create({ handlers, ... })`
+- All heavy dependencies lazy-loaded (officeparser, exceljs, unpdf)
+**Image Filtering:**
+- Configurable min dimensions, min size, max count, pattern exclusions
+- Automatically removes junk images (logos, icons, tiny backgrounds)
+- Applied both at extraction time and at content conversion time
+See the [User Guide](./USER_GUIDE.md#document-reader) for complete API reference and configuration options.
+### 18. External API Integration
 Connect your AI agents to 35+ external services with enterprise-grade resilience:
@@ -1315,4 +1477,4 @@ MIT License - See [LICENSE](./LICENSE) file.
 ---
-**Version:** 0.2.0 | **Last Updated:** 2026-02-09 | **[User Guide](./USER_GUIDE.md)** | **[API Reference](./API_REFERENCE.md)** | **[Changelog](./CHANGELOG.md)**
+**Version:** 0.2.1 | **Last Updated:** 2026-02-11 | **[User Guide](./USER_GUIDE.md)** | **[API Reference](./API_REFERENCE.md)** | **[Changelog](./CHANGELOG.md)**

package/dist/capabilities/agents/index.d.cts CHANGED Viewed

@@ -1,4 +1,4 @@
-export { aD as AfterToolContext, av as AgentEventName, A as AgentEvents, ay as AgenticLoopEventName, ax as AgenticLoopEvents, aG as ApprovalResult, aE as ApproveToolContext, m as AuditEntry, aC as BeforeToolContext, aI as ExecutionCompleteEvent, aw as ExecutionConfig, E as ExecutionContext, l as ExecutionMetrics, aH as ExecutionStartEvent, j as HistoryMode, aA as Hook, H as HookConfig, au as HookManager, az as HookName, aL as LLMRequestEvent, aM as LLMResponseEvent, aB as ModifyingHook, aK as ToolCompleteEvent, aF as ToolModification, aJ as ToolStartEvent } from '../../index-MJ14lkui.cjs';
+export { aD as AfterToolContext, av as AgentEventName, A as AgentEvents, ay as AgenticLoopEventName, ax as AgenticLoopEvents, aG as ApprovalResult, aE as ApproveToolContext, m as AuditEntry, aC as BeforeToolContext, aI as ExecutionCompleteEvent, aw as ExecutionConfig, E as ExecutionContext, l as ExecutionMetrics, aH as ExecutionStartEvent, j as HistoryMode, aA as Hook, H as HookConfig, au as HookManager, az as HookName, aL as LLMRequestEvent, aM as LLMResponseEvent, aB as ModifyingHook, aK as ToolCompleteEvent, aF as ToolModification, aJ as ToolStartEvent } from '../../index-D62LXWdW.cjs';
 import '../../IProvider-c4QCbPjn.cjs';
 import '../../Vendor-DYh_bzwo.cjs';
 import 'eventemitter3';

package/dist/capabilities/agents/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-export { aD as AfterToolContext, av as AgentEventName, A as AgentEvents, ay as AgenticLoopEventName, ax as AgenticLoopEvents, aG as ApprovalResult, aE as ApproveToolContext, m as AuditEntry, aC as BeforeToolContext, aI as ExecutionCompleteEvent, aw as ExecutionConfig, E as ExecutionContext, l as ExecutionMetrics, aH as ExecutionStartEvent, j as HistoryMode, aA as Hook, H as HookConfig, au as HookManager, az as HookName, aL as LLMRequestEvent, aM as LLMResponseEvent, aB as ModifyingHook, aK as ToolCompleteEvent, aF as ToolModification, aJ as ToolStartEvent } from '../../index-B5UaeEvK.js';
+export { aD as AfterToolContext, av as AgentEventName, A as AgentEvents, ay as AgenticLoopEventName, ax as AgenticLoopEvents, aG as ApprovalResult, aE as ApproveToolContext, m as AuditEntry, aC as BeforeToolContext, aI as ExecutionCompleteEvent, aw as ExecutionConfig, E as ExecutionContext, l as ExecutionMetrics, aH as ExecutionStartEvent, j as HistoryMode, aA as Hook, H as HookConfig, au as HookManager, az as HookName, aL as LLMRequestEvent, aM as LLMResponseEvent, aB as ModifyingHook, aK as ToolCompleteEvent, aF as ToolModification, aJ as ToolStartEvent } from '../../index-DVb6vfA3.js';
 import '../../IProvider-DcYJ3YE-.js';
 import '../../Vendor-DYh_bzwo.js';
 import 'eventemitter3';

package/dist/capabilities/images/index.cjs CHANGED Viewed

@@ -342,14 +342,32 @@ var AuthCodePKCEFlow = class {
     if (this.config.usePKCE !== false && verifierData) {
       params.append("code_verifier", verifierData.verifier);
     }
-    const response = await fetch(this.config.tokenUrl, {
+    let response = await fetch(this.config.tokenUrl, {
       method: "POST",
       headers: {
         "Content-Type": "application/x-www-form-urlencoded"
       },
       body: params
     });
-    if (!response.ok) {
+    if (!response.ok && this.config.clientSecret) {
+      const errorText = await response.text();
+      if (isPublicClientError(errorText)) {
+        params.delete("client_secret");
+        response = await fetch(this.config.tokenUrl, {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/x-www-form-urlencoded"
+          },
+          body: params
+        });
+        if (!response.ok) {
+          const retryError = await response.text();
+          throw new Error(`Token exchange failed: ${response.status} ${response.statusText} - ${retryError}`);
+        }
+      } else {
+        throw new Error(`Token exchange failed: ${response.status} ${response.statusText} - ${errorText}`);
+      }
+    } else if (!response.ok) {
       const error = await response.text();
       throw new Error(`Token exchange failed: ${response.status} ${response.statusText} - ${error}`);
     }
@@ -395,14 +413,32 @@ var AuthCodePKCEFlow = class {
     if (this.config.clientSecret) {
       params.append("client_secret", this.config.clientSecret);
     }
-    const response = await fetch(this.config.tokenUrl, {
+    let response = await fetch(this.config.tokenUrl, {
       method: "POST",
       headers: {
         "Content-Type": "application/x-www-form-urlencoded"
       },
       body: params
     });
-    if (!response.ok) {
+    if (!response.ok && this.config.clientSecret) {
+      const errorText = await response.text();
+      if (isPublicClientError(errorText)) {
+        params.delete("client_secret");
+        response = await fetch(this.config.tokenUrl, {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/x-www-form-urlencoded"
+          },
+          body: params
+        });
+        if (!response.ok) {
+          const retryError = await response.text();
+          throw new Error(`Token refresh failed: ${response.status} ${response.statusText} - ${retryError}`);
+        }
+      } else {
+        throw new Error(`Token refresh failed: ${response.status} ${response.statusText} - ${errorText}`);
+      }
+    } else if (!response.ok) {
       const error = await response.text();
       throw new Error(`Token refresh failed: ${response.status} ${response.statusText} - ${error}`);
     }
@@ -457,6 +493,10 @@ var AuthCodePKCEFlow = class {
     }
   }
 };
+function isPublicClientError(responseBody) {
+  const lower = responseBody.toLowerCase();
+  return lower.includes("aadsts700025") || lower.includes("invalid_client") && lower.includes("public");
+}
 // src/connectors/oauth/flows/ClientCredentials.ts
 var ClientCredentialsFlow = class {