@midscene/core 0.26.7-beta-20250818034910.0 → 0.26.7-beta-20250818035341.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/ai-model/common.mjs +37 -16
  2. package/dist/es/ai-model/common.mjs.map +1 -1
  3. package/dist/es/ai-model/index.mjs +3 -3
  4. package/dist/es/ai-model/inspect.mjs +51 -2
  5. package/dist/es/ai-model/inspect.mjs.map +1 -1
  6. package/dist/es/ai-model/llm-planning.mjs +1 -1
  7. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  8. package/dist/es/ai-model/prompt/assertion.mjs +25 -1
  9. package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
  10. package/dist/es/index.mjs +2 -2
  11. package/dist/es/index.mjs.map +1 -1
  12. package/dist/es/insight/index.mjs +36 -1
  13. package/dist/es/insight/index.mjs.map +1 -1
  14. package/dist/es/types.mjs.map +1 -1
  15. package/dist/es/utils.mjs +2 -2
  16. package/dist/lib/ai-model/common.js +36 -18
  17. package/dist/lib/ai-model/common.js.map +1 -1
  18. package/dist/lib/ai-model/index.js +3 -3
  19. package/dist/lib/ai-model/inspect.js +54 -2
  20. package/dist/lib/ai-model/inspect.js.map +1 -1
  21. package/dist/lib/ai-model/llm-planning.js +1 -1
  22. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  23. package/dist/lib/ai-model/prompt/assertion.js +29 -2
  24. package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
  25. package/dist/lib/index.js +3 -0
  26. package/dist/lib/index.js.map +1 -1
  27. package/dist/lib/insight/index.js +35 -0
  28. package/dist/lib/insight/index.js.map +1 -1
  29. package/dist/lib/types.js.map +1 -1
  30. package/dist/lib/utils.js +2 -2
  31. package/dist/types/ai-model/common.d.ts +2 -3
  32. package/dist/types/ai-model/index.d.ts +1 -2
  33. package/dist/types/ai-model/inspect.d.ts +8 -1
  34. package/dist/types/ai-model/prompt/assertion.d.ts +3 -0
  35. package/dist/types/index.d.ts +1 -1
  36. package/dist/types/insight/index.d.ts +2 -1
  37. package/dist/types/types.d.ts +0 -1
  38. package/dist/types/yaml.d.ts +6 -6
  39. package/package.json +3 -3
@@ -1,4 +1,4 @@
1
- import type { AIUsageInfo, BaseElement, DeviceAction, ElementTreeNode, MidsceneYamlFlowItem, PlanningAction, Rect, Size } from '../types';
1
+ import type { AIUsageInfo, BaseElement, ElementTreeNode, MidsceneYamlFlowItem, PlanningAction, Rect, Size } from '../types';
2
2
  import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
3
3
  import type { PlanningLocateParam } from '../types';
4
4
  export type AIArgs = [
@@ -12,7 +12,6 @@ export declare enum AIActionType {
12
12
  PLAN = 3,
13
13
  DESCRIBE_ELEMENT = 4
14
14
  }
15
- export declare const actionSpaceTypePrefix = "action_space_";
16
15
  export declare function callAiFn<T>(msgs: AIArgs, AIActionTypeValue: AIActionType): Promise<{
17
16
  content: T;
18
17
  usage?: AIUsageInfo;
@@ -32,4 +31,4 @@ export declare function mergeRects(rects: Rect[]): {
32
31
  };
33
32
  export declare function expandSearchArea(rect: Rect, screenSize: Size): Rect;
34
33
  export declare function markupImageForLLM(screenshotBase64: string, tree: ElementTreeNode<BaseElement>, size: Size): Promise<string>;
35
- export declare function buildYamlFlowFromPlans(plans: PlanningAction[], actionSpace: DeviceAction[], sleep?: number): MidsceneYamlFlowItem[];
34
+ export declare function buildYamlFlowFromPlans(plans: PlanningAction[], sleep?: number): MidsceneYamlFlowItem[];
@@ -4,9 +4,8 @@ export { describeUserPage, elementByPositionWithElementInfo, } from './prompt/ut
4
4
  export { generatePlaywrightTest, generatePlaywrightTestStream, } from './prompt/playwright-generator';
5
5
  export { generateYamlTest, generateYamlTestStream, } from './prompt/yaml-generator';
6
6
  export type { ChatCompletionMessageParam } from 'openai/resources/index';
7
- export { AiLocateElement, AiExtractElementInfo, AiLocateSection, } from './inspect';
7
+ export { AiLocateElement, AiExtractElementInfo, AiAssert, AiLocateSection, } from './inspect';
8
8
  export { plan } from './llm-planning';
9
9
  export { callAiFn, adaptBboxToRect, } from './common';
10
10
  export { vlmPlanning, resizeImageForUiTars } from './ui-tars-planning';
11
11
  export { AIActionType, type AIArgs } from './common';
12
- export { actionSpaceTypePrefix } from './common';
@@ -1,4 +1,4 @@
1
- import type { AIDataExtractionResponse, AIElementLocatorResponse, AIElementResponse, AISectionLocatorResponse, AIUsageInfo, BaseElement, ElementById, InsightExtractOption, Rect, ReferenceImage, TMultimodalPrompt, TUserPrompt, UIContext } from '../types';
1
+ import type { AIAssertionResponse, AIDataExtractionResponse, AIElementLocatorResponse, AIElementResponse, AISectionLocatorResponse, AIUsageInfo, BaseElement, ElementById, InsightExtractOption, Rect, ReferenceImage, TMultimodalPrompt, TUserPrompt, UIContext } from '../types';
2
2
  import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
3
3
  import { callAiFn } from './common';
4
4
  export type AIArgs = [
@@ -40,3 +40,10 @@ export declare function AiExtractElementInfo<T, ElementType extends BaseElement
40
40
  elementById: (idOrIndexId: string) => ElementType;
41
41
  usage: AIUsageInfo | undefined;
42
42
  }>;
43
+ export declare function AiAssert<ElementType extends BaseElement = BaseElement>(options: {
44
+ assertion: TUserPrompt;
45
+ context: UIContext<ElementType>;
46
+ }): Promise<{
47
+ content: AIAssertionResponse;
48
+ usage: AIUsageInfo | undefined;
49
+ }>;
@@ -1,2 +1,5 @@
1
1
  import type { ResponseFormatJSONSchema } from 'openai/resources/index';
2
+ export declare function systemPromptToAssert(model: {
3
+ isUITars: boolean;
4
+ }): string;
2
5
  export declare const assertSchema: ResponseFormatJSONSchema;
@@ -1,7 +1,7 @@
1
1
  import { Executor } from './ai-model/action-executor';
2
2
  import Insight from './insight/index';
3
3
  import { getVersion } from './utils';
4
- export { plan, describeUserPage, AiLocateElement, } from './ai-model/index';
4
+ export { plan, describeUserPage, AiLocateElement, AiAssert, } from './ai-model/index';
5
5
  export { getAIConfig, MIDSCENE_MODEL_NAME } from '@midscene/shared/env';
6
6
  export type * from './types';
7
7
  export default Insight;
@@ -1,5 +1,5 @@
1
1
  import { callAiFn } from '../ai-model/common';
2
- import type { AIDescribeElementResponse, AIElementResponse, AIUsageInfo, BaseElement, DetailedLocateParam, DumpSubscriber, InsightAction, InsightExtractOption, InsightExtractParam, InsightOptions, InsightTaskInfo, LocateResult, Rect, TMultimodalPrompt, UIContext } from '../types';
2
+ import type { AIDescribeElementResponse, AIElementResponse, AIUsageInfo, BaseElement, DetailedLocateParam, DumpSubscriber, InsightAction, InsightAssertionResponse, InsightExtractOption, InsightExtractParam, InsightOptions, InsightTaskInfo, LocateResult, Rect, TMultimodalPrompt, TUserPrompt, UIContext } from '../types';
3
3
  export interface LocateOpts {
4
4
  context?: UIContext<BaseElement>;
5
5
  callAI?: typeof callAiFn<AIElementResponse>;
@@ -19,6 +19,7 @@ export default class Insight<ElementType extends BaseElement = BaseElement, Cont
19
19
  thought?: string;
20
20
  usage?: AIUsageInfo;
21
21
  }>;
22
+ assert(assertion: TUserPrompt): Promise<InsightAssertionResponse>;
22
23
  describe(target: Rect | [number, number], opt?: {
23
24
  deepThink?: boolean;
24
25
  }): Promise<Pick<AIDescribeElementResponse, 'description'>>;
@@ -390,7 +390,6 @@ export type TUserPrompt = string | ({
390
390
  } & Partial<TMultimodalPrompt>);
391
391
  export interface DeviceAction<ParamType = any> {
392
392
  name: string;
393
- interfaceAlias?: string;
394
393
  description?: string;
395
394
  paramSchema?: string;
396
395
  paramDescription?: string;
@@ -10,7 +10,6 @@ export interface InsightExtractOption {
10
10
  domIncluded?: boolean | 'visible-only';
11
11
  screenshotIncluded?: boolean;
12
12
  returnThought?: boolean;
13
- isWaitForAssert?: boolean;
14
13
  }
15
14
  export interface ReferenceImage {
16
15
  base64: string;
@@ -111,15 +110,16 @@ export interface MidsceneYamlFlowItemAIHover extends LocateOption {
111
110
  aiHover: TUserPrompt;
112
111
  }
113
112
  export interface MidsceneYamlFlowItemAIInput extends LocateOption {
114
- aiInput: TUserPrompt | undefined;
115
- value: string;
113
+ aiInput: string;
114
+ locate: TUserPrompt;
116
115
  }
117
116
  export interface MidsceneYamlFlowItemAIKeyboardPress extends LocateOption {
118
- aiKeyboardPress: TUserPrompt | undefined;
119
- key: string;
117
+ aiKeyboardPress: string;
118
+ locate?: TUserPrompt;
120
119
  }
121
120
  export interface MidsceneYamlFlowItemAIScroll extends LocateOption, ScrollParam {
122
- aiScroll: TUserPrompt | undefined;
121
+ aiScroll: null;
122
+ locate?: TUserPrompt;
123
123
  }
124
124
  export interface MidsceneYamlFlowItemEvaluateJavaScript {
125
125
  javascript: string;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "0.26.7-beta-20250818034910.0",
4
+ "version": "0.26.7-beta-20250818035341.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "main": "./dist/lib/index.js",
@@ -60,8 +60,8 @@
60
60
  "langsmith": "0.3.7",
61
61
  "openai": "4.81.0",
62
62
  "socks-proxy-agent": "8.0.4",
63
- "@midscene/shared": "0.26.7-beta-20250818034910.0",
64
- "@midscene/recorder": "0.26.7-beta-20250818034910.0"
63
+ "@midscene/shared": "0.26.7-beta-20250818035341.0",
64
+ "@midscene/recorder": "0.26.7-beta-20250818035341.0"
65
65
  },
66
66
  "devDependencies": {
67
67
  "@microsoft/api-extractor": "^7.52.10",