@heyputer/puter.js 2.1.4 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts CHANGED
@@ -9,7 +9,7 @@ declare global {
9
9
  declare class Puter {
10
10
  // Properties
11
11
  appID: string;
12
- env: 'app' | 'web' | 'gui';
12
+ env: 'app' | 'web' | 'gui' | 'nodejs' | 'service-worker';
13
13
 
14
14
  // Utility methods
15
15
  print(text: string, options?: { code?: boolean }): void;
@@ -47,6 +47,9 @@ interface AI {
47
47
  chat(messages: ChatMessage[], testMode?: boolean, options?: NonStreamingChatOptions): Promise<ChatResponse>;
48
48
 
49
49
  img2txt(image: string | File | Blob, testMode?: boolean): Promise<string>;
50
+ img2txt(image: string | File | Blob, options?: Img2TxtOptions): Promise<string>;
51
+ img2txt(image: string | File | Blob, testMode?: boolean, options?: Img2TxtOptions): Promise<string>;
52
+ img2txt(options: Img2TxtOptions): Promise<string>;
50
53
 
51
54
  txt2img(prompt: string, testMode?: boolean): Promise<HTMLImageElement>;
52
55
  txt2img(prompt: string, options?: Txt2ImgOptions): Promise<HTMLImageElement>;
@@ -75,6 +78,16 @@ interface ChatOptions {
75
78
  stream?: boolean;
76
79
  max_tokens?: number;
77
80
  temperature?: number;
81
+ reasoning?: {
82
+ effort?: 'none' | 'low' | 'medium' | 'high' | 'minimal';
83
+ [key: string]: unknown;
84
+ };
85
+ reasoning_effort?: 'none' | 'low' | 'medium' | 'high' | 'minimal';
86
+ text?: {
87
+ verbosity?: 'low' | 'medium' | 'high';
88
+ [key: string]: unknown;
89
+ };
90
+ verbosity?: 'low' | 'medium' | 'high';
78
91
  tools?: ToolDefinition[];
79
92
  }
80
93
 
@@ -148,6 +161,19 @@ interface Txt2VidOptions {
148
161
  test_mode?: boolean;
149
162
  }
150
163
 
164
+ interface Img2TxtOptions {
165
+ source?: string | File | Blob;
166
+ provider?: 'aws-textract' | 'mistral';
167
+ model?: string;
168
+ pages?: number[];
169
+ includeImageBase64?: boolean;
170
+ imageLimit?: number;
171
+ imageMinSize?: number;
172
+ bboxAnnotationFormat?: Record<string, unknown>;
173
+ documentAnnotationFormat?: Record<string, unknown>;
174
+ testMode?: boolean;
175
+ }
176
+
151
177
  interface Txt2SpeechOptions {
152
178
  language?: string;
153
179
  voice?: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@heyputer/puter.js",
3
- "version": "2.1.4",
3
+ "version": "2.1.6",
4
4
  "description": "Puter.js - A JavaScript library for interacting with Puter services.",
5
5
  "main": "src/index.js",
6
6
  "types": "index.d.ts",
package/src/index.js CHANGED
@@ -785,6 +785,43 @@ export const puter = puterInit();
785
785
  export default puter;
786
786
  globalThis.puter = puter;
787
787
 
788
+ puter.tools = [];
789
+ /**
790
+ * @type {{messageTarget: Window}}
791
+ */
792
+ const puterParent = puter.ui.parentApp();
793
+ globalThis.puterParent = puterParent;
794
+ if (puterParent) {
795
+ console.log("I have a parent, registering tools")
796
+ puterParent.on('message', async (event) => {
797
+ console.log("Got tool req ", event)
798
+ if (event.$ === "requestTools") {
799
+ console.log("Responding with tools")
800
+ puterParent.postMessage({
801
+ $: "providedTools",
802
+ tools: JSON.parse(JSON.stringify(puter.tools))
803
+ });
804
+ }
805
+
806
+ if (event.$ === "executeTool") {
807
+ console.log("xecuting tools")
808
+ /**
809
+ * Puter tools format
810
+ * @type {[{exec: Function, function: {description: string, name: string, parameters: {properties: any, required: Array<string>}, type: string}}]}
811
+ */
812
+ const [tool] = puter.tools.filter(e=>e.function.name === event.toolName);
813
+
814
+ const response = await tool.exec(event.parameters);
815
+ puterParent.postMessage({
816
+ $: "toolResponse",
817
+ response,
818
+ tag: event.tag,
819
+ });
820
+ }
821
+ });
822
+ puterParent.postMessage({$: "ready"});
823
+ }
824
+
788
825
  globalThis.addEventListener && globalThis.addEventListener('message', async (event) => {
789
826
  // if the message is not from Puter, then ignore it
790
827
  if ( event.origin !== puter.defaultGUIOrigin ) return;
package/src/modules/AI.js CHANGED
@@ -118,48 +118,100 @@ class AI{
118
118
  }
119
119
 
120
120
  img2txt = async (...args) => {
121
- let MAX_INPUT_SIZE = 10 * 1024 * 1024;
121
+ const MAX_INPUT_SIZE = 10 * 1024 * 1024;
122
+ if (!args || args.length === 0) {
123
+ throw { message: 'Arguments are required', code: 'arguments_required' };
124
+ }
125
+
126
+ const isBlobLike = (value) => {
127
+ if (typeof Blob === 'undefined') return false;
128
+ return value instanceof Blob || (typeof File !== 'undefined' && value instanceof File);
129
+ };
130
+ const isPlainObject = (value) => value && typeof value === 'object' && !Array.isArray(value) && !isBlobLike(value);
131
+ const normalizeProvider = (value) => {
132
+ if (!value) return 'aws-textract';
133
+ const normalized = String(value).toLowerCase();
134
+ if (['aws', 'textract', 'aws-textract'].includes(normalized)) return 'aws-textract';
135
+ if (['mistral', 'mistral-ocr'].includes(normalized)) return 'mistral';
136
+ return 'aws-textract';
137
+ };
138
+
122
139
  let options = {};
140
+ if (isPlainObject(args[0])) {
141
+ options = { ...args[0] };
142
+ } else {
143
+ options.source = args[0];
144
+ }
145
+
123
146
  let testMode = false;
147
+ for (let i = 1; i < args.length; i++) {
148
+ const value = args[i];
149
+ if (typeof value === 'boolean') {
150
+ testMode = testMode || value;
151
+ } else if (isPlainObject(value)) {
152
+ options = { ...options, ...value };
153
+ }
154
+ }
124
155
 
125
- // Check that the argument is not undefined or null
126
- if(!args){
127
- throw({message: 'Arguments are required', code: 'arguments_required'});
156
+ if (typeof options.testMode === 'boolean') {
157
+ testMode = options.testMode;
128
158
  }
129
159
 
130
- // if argument is string transform it to the object that the API expects
131
- if (typeof args[0] === 'string' || args[0] instanceof Blob) {
132
- options.source = args[0];
160
+ const provider = normalizeProvider(options.provider);
161
+ delete options.provider;
162
+ delete options.testMode;
163
+
164
+ if (!options.source) {
165
+ throw { message: 'Source is required', code: 'source_required' };
133
166
  }
134
167
 
135
- // if input is a blob, transform it to a data URI
136
- if (args[0].source instanceof Blob) {
137
- options.source = await utils.blobToDataUri(args[0].source);
168
+ if (isBlobLike(options.source)) {
169
+ options.source = await utils.blobToDataUri(options.source);
170
+ } else if (options.source?.source && isBlobLike(options.source.source)) {
171
+ // Support shape { source: Blob }
172
+ options.source = await utils.blobToDataUri(options.source.source);
138
173
  }
139
174
 
140
- // check input size
141
- if (options.source.length > this.MAX_INPUT_SIZE) {
175
+ if (typeof options.source === 'string' &&
176
+ options.source.startsWith('data:') &&
177
+ options.source.length > MAX_INPUT_SIZE) {
142
178
  throw { message: 'Input size cannot be larger than ' + MAX_INPUT_SIZE, code: 'input_too_large' };
143
179
  }
144
180
 
145
- // determine if test mode is enabled
146
- if (typeof args[1] === 'boolean' && args[1] === true ||
147
- typeof args[2] === 'boolean' && args[2] === true ||
148
- typeof args[3] === 'boolean' && args[3] === true) {
149
- testMode = true;
150
- }
151
-
152
- return await utils.make_driver_method(['source'], 'puter-ocr', 'aws-textract', 'recognize', {
153
- test_mode: testMode ?? false,
154
- transform: async (result) => {
181
+ const toText = (result) => {
182
+ if (!result) return '';
183
+ if (Array.isArray(result.blocks) && result.blocks.length) {
155
184
  let str = '';
156
- for (let i = 0; i < result?.blocks?.length; i++) {
157
- if("text/textract:LINE" === result.blocks[i].type)
158
- str += result.blocks[i].text + "\n";
185
+ for (const block of result.blocks) {
186
+ if (typeof block?.text !== 'string') continue;
187
+ if (!block.type || block.type === 'text/textract:LINE' || block.type.startsWith('text/')) {
188
+ str += block.text + '\n';
189
+ }
159
190
  }
160
- return str;
191
+ if (str.trim()) return str;
161
192
  }
162
- }).call(this, options);
193
+ if (Array.isArray(result.pages) && result.pages.length) {
194
+ const markdown = result.pages
195
+ .map(page => (page?.markdown || '').trim())
196
+ .filter(Boolean)
197
+ .join('\n\n');
198
+ if (markdown.trim()) return markdown;
199
+ }
200
+ if (typeof result.document_annotation === 'string') {
201
+ return result.document_annotation;
202
+ }
203
+ if (typeof result.text === 'string') {
204
+ return result.text;
205
+ }
206
+ return '';
207
+ };
208
+
209
+ const driverCall = utils.make_driver_method(['source'], 'puter-ocr', provider, 'recognize', {
210
+ test_mode: testMode ?? false,
211
+ transform: async (result) => toText(result),
212
+ });
213
+
214
+ return await driverCall.call(this, options);
163
215
  }
164
216
 
165
217
  txt2speech = async (...args) => {
@@ -743,6 +795,9 @@ class AI{
743
795
  else if ( requestParams.model.startsWith('openrouter:') ) {
744
796
  driver = 'openrouter';
745
797
  }
798
+ else if ( requestParams.model.startsWith('ollama:') ) {
799
+ driver = 'ollama';
800
+ }
746
801
 
747
802
  // stream flag from userParams
748
803
  if(userParams.stream !== undefined && typeof userParams.stream === 'boolean'){
@@ -754,7 +809,7 @@ class AI{
754
809
  }
755
810
 
756
811
  // Additional parameters to pass from userParams to requestParams
757
- const PARAMS_TO_PASS = ['tools', 'response'];
812
+ const PARAMS_TO_PASS = ['tools', 'response', 'reasoning', 'reasoning_effort', 'text', 'verbosity'];
758
813
  for ( const name of PARAMS_TO_PASS ) {
759
814
  if ( userParams[name] ) {
760
815
  requestParams[name] = userParams[name];