@midscene/core 1.4.6 → 1.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/es/agent/agent.mjs +4 -56
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-builder.mjs +4 -1
  4. package/dist/es/agent/task-builder.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +91 -18
  6. package/dist/es/agent/utils.mjs.map +1 -1
  7. package/dist/es/ai-model/auto-glm/planning.mjs +1 -1
  8. package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -1
  9. package/dist/es/ai-model/inspect.mjs +5 -5
  10. package/dist/es/ai-model/inspect.mjs.map +1 -1
  11. package/dist/es/ai-model/llm-planning.mjs +3 -3
  12. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  13. package/dist/es/ai-model/service-caller/index.mjs +71 -45
  14. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  15. package/dist/es/ai-model/ui-tars-planning.mjs +14 -14
  16. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  17. package/dist/es/common.mjs +1 -14
  18. package/dist/es/common.mjs.map +1 -1
  19. package/dist/es/device/index.mjs.map +1 -1
  20. package/dist/es/dump/html-utils.mjs +2 -1
  21. package/dist/es/dump/html-utils.mjs.map +1 -1
  22. package/dist/es/index.mjs.map +1 -1
  23. package/dist/es/report-generator.mjs +2 -2
  24. package/dist/es/report-generator.mjs.map +1 -1
  25. package/dist/es/report.mjs +39 -7
  26. package/dist/es/report.mjs.map +1 -1
  27. package/dist/es/service/index.mjs +6 -6
  28. package/dist/es/service/index.mjs.map +1 -1
  29. package/dist/es/types.mjs.map +1 -1
  30. package/dist/es/utils.mjs +2 -2
  31. package/dist/lib/agent/agent.js +4 -56
  32. package/dist/lib/agent/agent.js.map +1 -1
  33. package/dist/lib/agent/task-builder.js +4 -1
  34. package/dist/lib/agent/task-builder.js.map +1 -1
  35. package/dist/lib/agent/utils.js +96 -14
  36. package/dist/lib/agent/utils.js.map +1 -1
  37. package/dist/lib/ai-model/auto-glm/planning.js +1 -1
  38. package/dist/lib/ai-model/auto-glm/planning.js.map +1 -1
  39. package/dist/lib/ai-model/inspect.js +5 -5
  40. package/dist/lib/ai-model/inspect.js.map +1 -1
  41. package/dist/lib/ai-model/llm-planning.js +3 -3
  42. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  43. package/dist/lib/ai-model/service-caller/index.js +73 -47
  44. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  45. package/dist/lib/ai-model/ui-tars-planning.js +14 -14
  46. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  47. package/dist/lib/common.js +1 -14
  48. package/dist/lib/common.js.map +1 -1
  49. package/dist/lib/device/index.js.map +1 -1
  50. package/dist/lib/dump/html-utils.js +4 -0
  51. package/dist/lib/dump/html-utils.js.map +1 -1
  52. package/dist/lib/index.js.map +1 -1
  53. package/dist/lib/report-generator.js +1 -1
  54. package/dist/lib/report-generator.js.map +1 -1
  55. package/dist/lib/report.js +36 -4
  56. package/dist/lib/report.js.map +1 -1
  57. package/dist/lib/service/index.js +6 -6
  58. package/dist/lib/service/index.js.map +1 -1
  59. package/dist/lib/types.js.map +1 -1
  60. package/dist/lib/utils.js +2 -2
  61. package/dist/types/agent/agent.d.ts +1 -15
  62. package/dist/types/agent/utils.d.ts +13 -1
  63. package/dist/types/ai-model/index.d.ts +1 -1
  64. package/dist/types/ai-model/service-caller/index.d.ts +4 -2
  65. package/dist/types/common.d.ts +0 -310
  66. package/dist/types/device/index.d.ts +1 -2
  67. package/dist/types/dump/html-utils.d.ts +11 -0
  68. package/dist/types/index.d.ts +1 -1
  69. package/dist/types/report.d.ts +5 -0
  70. package/dist/types/types.d.ts +2 -2
  71. package/package.json +2 -2
@@ -41,14 +41,6 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
41
41
  * Flag to track if VL model warning has been shown
42
42
  */
43
43
  private hasWarnedNonVLModel;
44
- /**
45
- * Screenshot scale factor derived from actual screenshot dimensions
46
- */
47
- private screenshotScale?;
48
- /**
49
- * Internal promise to deduplicate screenshot scale computation
50
- */
51
- private screenshotScalePromise?;
52
44
  private executionDumpIndexByRunner;
53
45
  private fullActionSpace;
54
46
  private reportGenerator;
@@ -57,10 +49,6 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
57
49
  * Ensures VL model warning is shown once when needed
58
50
  */
59
51
  private ensureVLModelWarning;
60
- /**
61
- * Lazily compute the ratio between the physical screenshot width and the logical page width
62
- */
63
- private getScreenshotScale;
64
52
  private resolveReplanningCycleLimit;
65
53
  constructor(interfaceInstance: InterfaceType, opts?: AgentOpt);
66
54
  getActionSpace(): Promise<DeviceAction[]>;
@@ -132,9 +120,7 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
132
120
  deepThink?: boolean;
133
121
  } & LocatorValidatorOption): Promise<AgentDescribeElementAtPointResult>;
134
122
  verifyLocator(prompt: string, locateOpt: LocateOption | undefined, expectCenter: [number, number], verifyLocateOption?: LocatorValidatorOption): Promise<LocateValidatorResult>;
135
- aiLocate(prompt: TUserPrompt, opt?: LocateOption): Promise<Pick<LocateResultElement, "center" | "rect"> & {
136
- dpr?: number;
137
- }>;
123
+ aiLocate(prompt: TUserPrompt, opt?: LocateOption): Promise<Pick<LocateResultElement, "rect" | "center">>;
138
124
  aiAssert(assertion: TUserPrompt, msg?: string, opt?: AgentAssertOpt & ServiceExtractOption): Promise<{
139
125
  pass: boolean;
140
126
  thought: string | undefined;
@@ -1,6 +1,6 @@
1
1
  import type { TMultimodalPrompt, TUserPrompt } from '../common';
2
2
  import type { AbstractInterface } from '../device';
3
- import type { ElementCacheFeature, LocateResultElement, PlanningLocateParam, UIContext } from '../types';
3
+ import type { ElementCacheFeature, LocateResultElement, PlanningLocateParam, Rect, UIContext } from '../types';
4
4
  import type { TaskCache } from './task-cache';
5
5
  export declare function commonContextParser(interfaceInstance: AbstractInterface, _opt: {
6
6
  uploadServerUrl?: string;
@@ -24,3 +24,15 @@ export declare const parsePrompt: (prompt: TUserPrompt) => {
24
24
  textPrompt: string;
25
25
  multimodalPrompt?: TMultimodalPrompt;
26
26
  };
27
+ /**
28
+ * Transform coordinates from screenshot coordinate system to logical coordinate system.
29
+ * When shrunkShotToLogicalRatio > 1, the screenshot is larger than logical size,
30
+ * so we need to divide coordinates by shrunkShotToLogicalRatio.
31
+ *
32
+ * @param element - The locate result element with coordinates in screenshot space
33
+ * @param shrunkShotToLogicalRatio - The ratio of screenshot size to logical size
34
+ * @returns A new element with coordinates transformed to logical space
35
+ */
36
+ export declare const transformScreenshotElementToLogical: (element: LocateResultElement, shrunkShotToLogicalRatio: number) => LocateResultElement;
37
+ export declare const transformLogicalElementToScreenshot: (element: LocateResultElement, shrunkShotToLogicalRatio: number) => LocateResultElement;
38
+ export declare const transformLogicalRectToScreenshotRect: (rect: Rect, shrunkShotToLogicalRatio: number) => Rect;
@@ -11,4 +11,4 @@ export { uiTarsPlanning } from './ui-tars-planning';
11
11
  export { ConversationHistory, type ConversationHistoryOptions, } from './conversation-history';
12
12
  export type { SubGoal, SubGoalStatus } from '../types';
13
13
  export type { AIArgs } from '../common';
14
- export { getMidsceneLocationSchema, type MidsceneLocationResultType, PointSchema, SizeSchema, RectSchema, TMultimodalPromptSchema, TUserPromptSchema, type TMultimodalPrompt, type TUserPrompt, findAllMidsceneLocatorField, dumpActionParam, parseActionParam, } from '../common';
14
+ export { getMidsceneLocationSchema, PointSchema, SizeSchema, RectSchema, TMultimodalPromptSchema, TUserPromptSchema, type TMultimodalPrompt, type TUserPrompt, findAllMidsceneLocatorField, dumpActionParam, parseActionParam, } from '../common';
@@ -32,8 +32,10 @@ export declare function callAIWithStringResponse(msgs: AIArgs, modelConfig: IMod
32
32
  }>;
33
33
  export declare function extractJSONFromCodeBlock(response: string): string;
34
34
  export declare function preprocessDoubaoBboxJson(input: string): string;
35
- export declare function resolveDeepThinkConfig({ deepThink, modelFamily, }: {
36
- deepThink?: DeepThinkOption;
35
+ export declare function resolveReasoningConfig({ reasoningEnabled, reasoningEffort, reasoningBudget, modelFamily, }: {
36
+ reasoningEnabled?: boolean;
37
+ reasoningEffort?: string;
38
+ reasoningBudget?: number;
37
39
  modelFamily?: TModelFamily;
38
40
  }): {
39
41
  config: Record<string, unknown>;
@@ -55,15 +55,12 @@ export declare const PointSchema: z.ZodObject<{
55
55
  export declare const SizeSchema: z.ZodObject<{
56
56
  width: z.ZodNumber;
57
57
  height: z.ZodNumber;
58
- dpr: z.ZodOptional<z.ZodNumber>;
59
58
  }, "strip", z.ZodTypeAny, {
60
59
  width: number;
61
60
  height: number;
62
- dpr?: number | undefined;
63
61
  }, {
64
62
  width: number;
65
63
  height: number;
66
- dpr?: number | undefined;
67
64
  }>;
68
65
  export declare const RectSchema: z.ZodIntersection<z.ZodIntersection<z.ZodObject<{
69
66
  left: z.ZodNumber;
@@ -77,15 +74,12 @@ export declare const RectSchema: z.ZodIntersection<z.ZodIntersection<z.ZodObject
77
74
  }>, z.ZodObject<{
78
75
  width: z.ZodNumber;
79
76
  height: z.ZodNumber;
80
- dpr: z.ZodOptional<z.ZodNumber>;
81
77
  }, "strip", z.ZodTypeAny, {
82
78
  width: number;
83
79
  height: number;
84
- dpr?: number | undefined;
85
80
  }, {
86
81
  width: number;
87
82
  height: number;
88
- dpr?: number | undefined;
89
83
  }>>, z.ZodObject<{
90
84
  zoom: z.ZodOptional<z.ZodNumber>;
91
85
  }, "strip", z.ZodTypeAny, {
@@ -151,310 +145,6 @@ export declare const TUserPromptSchema: z.ZodUnion<[z.ZodString, z.ZodIntersecti
151
145
  }>>]>;
152
146
  export type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;
153
147
  export type TUserPrompt = z.infer<typeof TUserPromptSchema>;
154
- declare const MidsceneLocationInput: z.ZodObject<{
155
- prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
156
- prompt: z.ZodString;
157
- }, "strip", z.ZodTypeAny, {
158
- prompt: string;
159
- }, {
160
- prompt: string;
161
- }>, z.ZodObject<{
162
- images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
163
- name: z.ZodString;
164
- url: z.ZodString;
165
- }, "strip", z.ZodTypeAny, {
166
- name: string;
167
- url: string;
168
- }, {
169
- name: string;
170
- url: string;
171
- }>, "many">>>;
172
- convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
173
- }, "strip", z.ZodTypeAny, {
174
- images?: {
175
- name: string;
176
- url: string;
177
- }[] | undefined;
178
- convertHttpImage2Base64?: boolean | undefined;
179
- }, {
180
- images?: {
181
- name: string;
182
- url: string;
183
- }[] | undefined;
184
- convertHttpImage2Base64?: boolean | undefined;
185
- }>>]>;
186
- deepThink: z.ZodOptional<z.ZodBoolean>;
187
- cacheable: z.ZodOptional<z.ZodBoolean>;
188
- xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
189
- }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
190
- prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
191
- prompt: z.ZodString;
192
- }, "strip", z.ZodTypeAny, {
193
- prompt: string;
194
- }, {
195
- prompt: string;
196
- }>, z.ZodObject<{
197
- images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
198
- name: z.ZodString;
199
- url: z.ZodString;
200
- }, "strip", z.ZodTypeAny, {
201
- name: string;
202
- url: string;
203
- }, {
204
- name: string;
205
- url: string;
206
- }>, "many">>>;
207
- convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
208
- }, "strip", z.ZodTypeAny, {
209
- images?: {
210
- name: string;
211
- url: string;
212
- }[] | undefined;
213
- convertHttpImage2Base64?: boolean | undefined;
214
- }, {
215
- images?: {
216
- name: string;
217
- url: string;
218
- }[] | undefined;
219
- convertHttpImage2Base64?: boolean | undefined;
220
- }>>]>;
221
- deepThink: z.ZodOptional<z.ZodBoolean>;
222
- cacheable: z.ZodOptional<z.ZodBoolean>;
223
- xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
224
- }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
225
- prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
226
- prompt: z.ZodString;
227
- }, "strip", z.ZodTypeAny, {
228
- prompt: string;
229
- }, {
230
- prompt: string;
231
- }>, z.ZodObject<{
232
- images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
233
- name: z.ZodString;
234
- url: z.ZodString;
235
- }, "strip", z.ZodTypeAny, {
236
- name: string;
237
- url: string;
238
- }, {
239
- name: string;
240
- url: string;
241
- }>, "many">>>;
242
- convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
243
- }, "strip", z.ZodTypeAny, {
244
- images?: {
245
- name: string;
246
- url: string;
247
- }[] | undefined;
248
- convertHttpImage2Base64?: boolean | undefined;
249
- }, {
250
- images?: {
251
- name: string;
252
- url: string;
253
- }[] | undefined;
254
- convertHttpImage2Base64?: boolean | undefined;
255
- }>>]>;
256
- deepThink: z.ZodOptional<z.ZodBoolean>;
257
- cacheable: z.ZodOptional<z.ZodBoolean>;
258
- xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
259
- }, z.ZodTypeAny, "passthrough">>;
260
- declare const MidsceneLocationResult: z.ZodObject<{
261
- midscene_location_field_flag: z.ZodLiteral<true>;
262
- prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
263
- prompt: z.ZodString;
264
- }, "strip", z.ZodTypeAny, {
265
- prompt: string;
266
- }, {
267
- prompt: string;
268
- }>, z.ZodObject<{
269
- images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
270
- name: z.ZodString;
271
- url: z.ZodString;
272
- }, "strip", z.ZodTypeAny, {
273
- name: string;
274
- url: string;
275
- }, {
276
- name: string;
277
- url: string;
278
- }>, "many">>>;
279
- convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
280
- }, "strip", z.ZodTypeAny, {
281
- images?: {
282
- name: string;
283
- url: string;
284
- }[] | undefined;
285
- convertHttpImage2Base64?: boolean | undefined;
286
- }, {
287
- images?: {
288
- name: string;
289
- url: string;
290
- }[] | undefined;
291
- convertHttpImage2Base64?: boolean | undefined;
292
- }>>]>;
293
- deepThink: z.ZodOptional<z.ZodBoolean>;
294
- cacheable: z.ZodOptional<z.ZodBoolean>;
295
- xpath: z.ZodOptional<z.ZodBoolean>;
296
- center: z.ZodTuple<[z.ZodNumber, z.ZodNumber], null>;
297
- rect: z.ZodIntersection<z.ZodIntersection<z.ZodObject<{
298
- left: z.ZodNumber;
299
- top: z.ZodNumber;
300
- }, "strip", z.ZodTypeAny, {
301
- left: number;
302
- top: number;
303
- }, {
304
- left: number;
305
- top: number;
306
- }>, z.ZodObject<{
307
- width: z.ZodNumber;
308
- height: z.ZodNumber;
309
- dpr: z.ZodOptional<z.ZodNumber>;
310
- }, "strip", z.ZodTypeAny, {
311
- width: number;
312
- height: number;
313
- dpr?: number | undefined;
314
- }, {
315
- width: number;
316
- height: number;
317
- dpr?: number | undefined;
318
- }>>, z.ZodObject<{
319
- zoom: z.ZodOptional<z.ZodNumber>;
320
- }, "strip", z.ZodTypeAny, {
321
- zoom?: number | undefined;
322
- }, {
323
- zoom?: number | undefined;
324
- }>>;
325
- }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
326
- midscene_location_field_flag: z.ZodLiteral<true>;
327
- prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
328
- prompt: z.ZodString;
329
- }, "strip", z.ZodTypeAny, {
330
- prompt: string;
331
- }, {
332
- prompt: string;
333
- }>, z.ZodObject<{
334
- images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
335
- name: z.ZodString;
336
- url: z.ZodString;
337
- }, "strip", z.ZodTypeAny, {
338
- name: string;
339
- url: string;
340
- }, {
341
- name: string;
342
- url: string;
343
- }>, "many">>>;
344
- convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
345
- }, "strip", z.ZodTypeAny, {
346
- images?: {
347
- name: string;
348
- url: string;
349
- }[] | undefined;
350
- convertHttpImage2Base64?: boolean | undefined;
351
- }, {
352
- images?: {
353
- name: string;
354
- url: string;
355
- }[] | undefined;
356
- convertHttpImage2Base64?: boolean | undefined;
357
- }>>]>;
358
- deepThink: z.ZodOptional<z.ZodBoolean>;
359
- cacheable: z.ZodOptional<z.ZodBoolean>;
360
- xpath: z.ZodOptional<z.ZodBoolean>;
361
- center: z.ZodTuple<[z.ZodNumber, z.ZodNumber], null>;
362
- rect: z.ZodIntersection<z.ZodIntersection<z.ZodObject<{
363
- left: z.ZodNumber;
364
- top: z.ZodNumber;
365
- }, "strip", z.ZodTypeAny, {
366
- left: number;
367
- top: number;
368
- }, {
369
- left: number;
370
- top: number;
371
- }>, z.ZodObject<{
372
- width: z.ZodNumber;
373
- height: z.ZodNumber;
374
- dpr: z.ZodOptional<z.ZodNumber>;
375
- }, "strip", z.ZodTypeAny, {
376
- width: number;
377
- height: number;
378
- dpr?: number | undefined;
379
- }, {
380
- width: number;
381
- height: number;
382
- dpr?: number | undefined;
383
- }>>, z.ZodObject<{
384
- zoom: z.ZodOptional<z.ZodNumber>;
385
- }, "strip", z.ZodTypeAny, {
386
- zoom?: number | undefined;
387
- }, {
388
- zoom?: number | undefined;
389
- }>>;
390
- }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
391
- midscene_location_field_flag: z.ZodLiteral<true>;
392
- prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
393
- prompt: z.ZodString;
394
- }, "strip", z.ZodTypeAny, {
395
- prompt: string;
396
- }, {
397
- prompt: string;
398
- }>, z.ZodObject<{
399
- images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
400
- name: z.ZodString;
401
- url: z.ZodString;
402
- }, "strip", z.ZodTypeAny, {
403
- name: string;
404
- url: string;
405
- }, {
406
- name: string;
407
- url: string;
408
- }>, "many">>>;
409
- convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
410
- }, "strip", z.ZodTypeAny, {
411
- images?: {
412
- name: string;
413
- url: string;
414
- }[] | undefined;
415
- convertHttpImage2Base64?: boolean | undefined;
416
- }, {
417
- images?: {
418
- name: string;
419
- url: string;
420
- }[] | undefined;
421
- convertHttpImage2Base64?: boolean | undefined;
422
- }>>]>;
423
- deepThink: z.ZodOptional<z.ZodBoolean>;
424
- cacheable: z.ZodOptional<z.ZodBoolean>;
425
- xpath: z.ZodOptional<z.ZodBoolean>;
426
- center: z.ZodTuple<[z.ZodNumber, z.ZodNumber], null>;
427
- rect: z.ZodIntersection<z.ZodIntersection<z.ZodObject<{
428
- left: z.ZodNumber;
429
- top: z.ZodNumber;
430
- }, "strip", z.ZodTypeAny, {
431
- left: number;
432
- top: number;
433
- }, {
434
- left: number;
435
- top: number;
436
- }>, z.ZodObject<{
437
- width: z.ZodNumber;
438
- height: z.ZodNumber;
439
- dpr: z.ZodOptional<z.ZodNumber>;
440
- }, "strip", z.ZodTypeAny, {
441
- width: number;
442
- height: number;
443
- dpr?: number | undefined;
444
- }, {
445
- width: number;
446
- height: number;
447
- dpr?: number | undefined;
448
- }>>, z.ZodObject<{
449
- zoom: z.ZodOptional<z.ZodNumber>;
450
- }, "strip", z.ZodTypeAny, {
451
- zoom?: number | undefined;
452
- }, {
453
- zoom?: number | undefined;
454
- }>>;
455
- }, z.ZodTypeAny, "passthrough">>;
456
- export type MidsceneLocationResultType = z.infer<typeof MidsceneLocationResult>;
457
- export type MidsceneLocationInputType = z.infer<typeof MidsceneLocationInput>;
458
148
  /**
459
149
  * Returns the schema for locator fields.
460
150
  * This now returns the input schema which is more permissive and suitable for validation.
@@ -2,7 +2,7 @@ import type { ActionScrollParam, DeviceAction, LocateResultElement } from '../ty
2
2
  import type { IModelConfig } from '@midscene/shared/env';
3
3
  import type { ElementNode } from '@midscene/shared/extractor';
4
4
  import { z } from 'zod';
5
- import type { ElementCacheFeature, Rect, Size, UIContext } from '../types';
5
+ import type { ElementCacheFeature, Rect, Size } from '../types';
6
6
  export interface FileChooserHandler {
7
7
  accept(files: string[]): Promise<void>;
8
8
  }
@@ -27,7 +27,6 @@ export declare abstract class AbstractInterface {
27
27
  abstract getElementsNodeTree?: () => Promise<ElementNode>;
28
28
  abstract url?: () => string | Promise<string>;
29
29
  abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;
30
- abstract getContext?(): Promise<UIContext>;
31
30
  /**
32
31
  * Get the current time from the device.
33
32
  * Returns the device's current timestamp in milliseconds.
@@ -47,5 +47,16 @@ export declare function parseImageScripts(html: string): Record<string, string>;
47
47
  export declare function parseDumpScript(html: string): string;
48
48
  export declare function parseDumpScriptAttributes(html: string): Record<string, string>;
49
49
  export declare function generateImageScriptTag(id: string, data: string): string;
50
+ /**
51
+ * Inline script that fixes relative URL resolution for directory-mode reports.
52
+ *
53
+ * Problem: when a static server (e.g. `npx serve`) serves `name/index.html`
54
+ * at URL `/name` (without trailing slash), relative paths like
55
+ * `./screenshots/xxx.png` resolve to `/screenshots/xxx.png` instead of
56
+ * `/name/screenshots/xxx.png`.
57
+ *
58
+ * Fix: dynamically insert a <base> tag so relative URLs resolve correctly.
59
+ */
60
+ export declare const BASE_URL_FIX_SCRIPT: string;
50
61
  export declare function generateDumpScriptTag(json: string, attributes?: Record<string, string>): string;
51
62
  export {};
@@ -2,7 +2,7 @@ import { z } from 'zod';
2
2
  import Service from './service/index';
3
3
  import { TaskRunner } from './task-runner';
4
4
  import { getVersion } from './utils';
5
- export { plan, AiLocateElement, getMidsceneLocationSchema, type MidsceneLocationResultType, PointSchema, SizeSchema, RectSchema, TMultimodalPromptSchema, TUserPromptSchema, type TMultimodalPrompt, type TUserPrompt, } from './ai-model/index';
5
+ export { plan, AiLocateElement, getMidsceneLocationSchema, PointSchema, SizeSchema, RectSchema, TMultimodalPromptSchema, TUserPromptSchema, type TMultimodalPrompt, type TUserPrompt, } from './ai-model/index';
6
6
  export { MIDSCENE_MODEL_NAME, type CreateOpenAIClientFn, } from '@midscene/shared/env';
7
7
  export type * from './types';
8
8
  export { ServiceError, ExecutionDump, GroupedActionDump, type IExecutionDump, type IGroupedActionDump, } from './types';
@@ -3,6 +3,11 @@ export declare class ReportMergingTool {
3
3
  private reportInfos;
4
4
  append(reportInfo: ReportFileWithAttributes): void;
5
5
  clear(): void;
6
+ /**
7
+ * Check if a report is in directory mode (html-and-external-assets).
8
+ * Directory mode reports: {name}/index.html + {name}/screenshots/
9
+ */
10
+ private isDirectoryModeReport;
6
11
  mergeReports(reportFileName?: 'AUTO' | string, opts?: {
7
12
  rmOriginalReports?: boolean;
8
13
  overwrite?: boolean;
@@ -70,8 +70,9 @@ export interface AgentDescribeElementAtPointResult {
70
70
  */
71
71
  export declare abstract class UIContext {
72
72
  abstract screenshot: ScreenshotItem;
73
- abstract size: Size;
73
+ abstract shotSize: Size;
74
74
  abstract _isFrozen?: boolean;
75
+ abstract deprecatedDpr?: number;
75
76
  }
76
77
  export type EnsureObject<T> = {
77
78
  [K in keyof T]: any;
@@ -492,7 +493,6 @@ export interface WebElementInfo extends BaseElement {
492
493
  [key: string]: string;
493
494
  };
494
495
  }
495
- export type WebUIContext = UIContext;
496
496
  /**
497
497
  * Agent
498
498
  */
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "1.4.6",
4
+ "version": "1.4.7",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "main": "./dist/lib/index.js",
@@ -89,7 +89,7 @@
89
89
  "semver": "7.5.2",
90
90
  "undici": "^6.0.0",
91
91
  "zod": "3.24.3",
92
- "@midscene/shared": "1.4.6"
92
+ "@midscene/shared": "1.4.7"
93
93
  },
94
94
  "devDependencies": {
95
95
  "@rslib/core": "^0.18.3",