@godscene/core 1.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +767 -0
  4. package/dist/es/agent/common.mjs +0 -0
  5. package/dist/es/agent/execution-session.mjs +39 -0
  6. package/dist/es/agent/index.mjs +6 -0
  7. package/dist/es/agent/task-builder.mjs +343 -0
  8. package/dist/es/agent/task-cache.mjs +212 -0
  9. package/dist/es/agent/tasks.mjs +428 -0
  10. package/dist/es/agent/ui-utils.mjs +101 -0
  11. package/dist/es/agent/utils.mjs +167 -0
  12. package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
  13. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  14. package/dist/es/ai-model/auto-glm/parser.mjs +237 -0
  15. package/dist/es/ai-model/auto-glm/planning.mjs +69 -0
  16. package/dist/es/ai-model/auto-glm/prompt.mjs +220 -0
  17. package/dist/es/ai-model/auto-glm/util.mjs +7 -0
  18. package/dist/es/ai-model/connectivity.mjs +136 -0
  19. package/dist/es/ai-model/conversation-history.mjs +193 -0
  20. package/dist/es/ai-model/index.mjs +12 -0
  21. package/dist/es/ai-model/inspect.mjs +395 -0
  22. package/dist/es/ai-model/llm-planning.mjs +231 -0
  23. package/dist/es/ai-model/prompt/common.mjs +5 -0
  24. package/dist/es/ai-model/prompt/describe.mjs +64 -0
  25. package/dist/es/ai-model/prompt/extraction.mjs +129 -0
  26. package/dist/es/ai-model/prompt/llm-locator.mjs +49 -0
  27. package/dist/es/ai-model/prompt/llm-planning.mjs +584 -0
  28. package/dist/es/ai-model/prompt/llm-section-locator.mjs +42 -0
  29. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +33 -0
  30. package/dist/es/ai-model/prompt/playwright-generator.mjs +115 -0
  31. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +34 -0
  32. package/dist/es/ai-model/prompt/util.mjs +57 -0
  33. package/dist/es/ai-model/prompt/yaml-generator.mjs +201 -0
  34. package/dist/es/ai-model/service-caller/codex-app-server.mjs +573 -0
  35. package/dist/es/ai-model/service-caller/image-detail.mjs +4 -0
  36. package/dist/es/ai-model/service-caller/index.mjs +648 -0
  37. package/dist/es/ai-model/service-caller/request-timeout.mjs +47 -0
  38. package/dist/es/ai-model/ui-tars-planning.mjs +247 -0
  39. package/dist/es/common.mjs +382 -0
  40. package/dist/es/device/device-options.mjs +0 -0
  41. package/dist/es/device/index.mjs +340 -0
  42. package/dist/es/dump/html-utils.mjs +290 -0
  43. package/dist/es/dump/index.mjs +3 -0
  44. package/dist/es/dump/screenshot-restoration.mjs +30 -0
  45. package/dist/es/dump/screenshot-store.mjs +125 -0
  46. package/dist/es/index.mjs +17 -0
  47. package/dist/es/report-cli.mjs +149 -0
  48. package/dist/es/report-generator.mjs +203 -0
  49. package/dist/es/report-markdown.mjs +216 -0
  50. package/dist/es/report.mjs +287 -0
  51. package/dist/es/screenshot-item.mjs +120 -0
  52. package/dist/es/service/index.mjs +272 -0
  53. package/dist/es/service/utils.mjs +13 -0
  54. package/dist/es/skill/index.mjs +35 -0
  55. package/dist/es/task-runner.mjs +261 -0
  56. package/dist/es/task-timing.mjs +10 -0
  57. package/dist/es/tree.mjs +11 -0
  58. package/dist/es/types.mjs +202 -0
  59. package/dist/es/utils.mjs +232 -0
  60. package/dist/es/yaml/builder.mjs +11 -0
  61. package/dist/es/yaml/index.mjs +4 -0
  62. package/dist/es/yaml/player.mjs +425 -0
  63. package/dist/es/yaml/utils.mjs +100 -0
  64. package/dist/es/yaml.mjs +0 -0
  65. package/dist/lib/agent/agent.js +815 -0
  66. package/dist/lib/agent/common.js +5 -0
  67. package/dist/lib/agent/execution-session.js +73 -0
  68. package/dist/lib/agent/index.js +76 -0
  69. package/dist/lib/agent/task-builder.js +380 -0
  70. package/dist/lib/agent/task-cache.js +264 -0
  71. package/dist/lib/agent/tasks.js +471 -0
  72. package/dist/lib/agent/ui-utils.js +153 -0
  73. package/dist/lib/agent/utils.js +238 -0
  74. package/dist/lib/ai-model/auto-glm/actions.js +271 -0
  75. package/dist/lib/ai-model/auto-glm/index.js +64 -0
  76. package/dist/lib/ai-model/auto-glm/parser.js +280 -0
  77. package/dist/lib/ai-model/auto-glm/planning.js +103 -0
  78. package/dist/lib/ai-model/auto-glm/prompt.js +257 -0
  79. package/dist/lib/ai-model/auto-glm/util.js +44 -0
  80. package/dist/lib/ai-model/connectivity.js +180 -0
  81. package/dist/lib/ai-model/conversation-history.js +227 -0
  82. package/dist/lib/ai-model/index.js +127 -0
  83. package/dist/lib/ai-model/inspect.js +441 -0
  84. package/dist/lib/ai-model/llm-planning.js +268 -0
  85. package/dist/lib/ai-model/prompt/common.js +39 -0
  86. package/dist/lib/ai-model/prompt/describe.js +98 -0
  87. package/dist/lib/ai-model/prompt/extraction.js +169 -0
  88. package/dist/lib/ai-model/prompt/llm-locator.js +86 -0
  89. package/dist/lib/ai-model/prompt/llm-planning.js +621 -0
  90. package/dist/lib/ai-model/prompt/llm-section-locator.js +79 -0
  91. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +70 -0
  92. package/dist/lib/ai-model/prompt/playwright-generator.js +176 -0
  93. package/dist/lib/ai-model/prompt/ui-tars-planning.js +71 -0
  94. package/dist/lib/ai-model/prompt/util.js +103 -0
  95. package/dist/lib/ai-model/prompt/yaml-generator.js +262 -0
  96. package/dist/lib/ai-model/service-caller/codex-app-server.js +622 -0
  97. package/dist/lib/ai-model/service-caller/image-detail.js +38 -0
  98. package/dist/lib/ai-model/service-caller/index.js +716 -0
  99. package/dist/lib/ai-model/service-caller/request-timeout.js +93 -0
  100. package/dist/lib/ai-model/ui-tars-planning.js +281 -0
  101. package/dist/lib/common.js +491 -0
  102. package/dist/lib/device/device-options.js +18 -0
  103. package/dist/lib/device/index.js +467 -0
  104. package/dist/lib/dump/html-utils.js +366 -0
  105. package/dist/lib/dump/index.js +58 -0
  106. package/dist/lib/dump/screenshot-restoration.js +64 -0
  107. package/dist/lib/dump/screenshot-store.js +165 -0
  108. package/dist/lib/index.js +184 -0
  109. package/dist/lib/report-cli.js +189 -0
  110. package/dist/lib/report-generator.js +244 -0
  111. package/dist/lib/report-markdown.js +253 -0
  112. package/dist/lib/report.js +333 -0
  113. package/dist/lib/screenshot-item.js +154 -0
  114. package/dist/lib/service/index.js +306 -0
  115. package/dist/lib/service/utils.js +47 -0
  116. package/dist/lib/skill/index.js +69 -0
  117. package/dist/lib/task-runner.js +298 -0
  118. package/dist/lib/task-timing.js +44 -0
  119. package/dist/lib/tree.js +51 -0
  120. package/dist/lib/types.js +298 -0
  121. package/dist/lib/utils.js +314 -0
  122. package/dist/lib/yaml/builder.js +55 -0
  123. package/dist/lib/yaml/index.js +79 -0
  124. package/dist/lib/yaml/player.js +459 -0
  125. package/dist/lib/yaml/utils.js +153 -0
  126. package/dist/lib/yaml.js +18 -0
  127. package/dist/types/agent/agent.d.ts +220 -0
  128. package/dist/types/agent/common.d.ts +0 -0
  129. package/dist/types/agent/execution-session.d.ts +36 -0
  130. package/dist/types/agent/index.d.ts +9 -0
  131. package/dist/types/agent/task-builder.d.ts +34 -0
  132. package/dist/types/agent/task-cache.d.ts +49 -0
  133. package/dist/types/agent/tasks.d.ts +70 -0
  134. package/dist/types/agent/ui-utils.d.ts +14 -0
  135. package/dist/types/agent/utils.d.ts +25 -0
  136. package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
  137. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  138. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  139. package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
  140. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  141. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  142. package/dist/types/ai-model/connectivity.d.ts +20 -0
  143. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  144. package/dist/types/ai-model/index.d.ts +16 -0
  145. package/dist/types/ai-model/inspect.d.ts +67 -0
  146. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  147. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  148. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  149. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  150. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  151. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  152. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  153. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  154. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  155. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  156. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  157. package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
  158. package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
  159. package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
  160. package/dist/types/ai-model/service-caller/index.d.ts +60 -0
  161. package/dist/types/ai-model/service-caller/request-timeout.d.ts +32 -0
  162. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  163. package/dist/types/common.d.ts +288 -0
  164. package/dist/types/device/device-options.d.ts +155 -0
  165. package/dist/types/device/index.d.ts +2565 -0
  166. package/dist/types/dump/html-utils.d.ts +75 -0
  167. package/dist/types/dump/index.d.ts +5 -0
  168. package/dist/types/dump/screenshot-restoration.d.ts +8 -0
  169. package/dist/types/dump/screenshot-store.d.ts +49 -0
  170. package/dist/types/index.d.ts +21 -0
  171. package/dist/types/report-cli.d.ts +36 -0
  172. package/dist/types/report-generator.d.ts +88 -0
  173. package/dist/types/report-markdown.d.ts +24 -0
  174. package/dist/types/report.d.ts +52 -0
  175. package/dist/types/screenshot-item.d.ts +67 -0
  176. package/dist/types/service/index.d.ts +24 -0
  177. package/dist/types/service/utils.d.ts +2 -0
  178. package/dist/types/skill/index.d.ts +25 -0
  179. package/dist/types/task-runner.d.ts +50 -0
  180. package/dist/types/task-timing.d.ts +8 -0
  181. package/dist/types/tree.d.ts +4 -0
  182. package/dist/types/types.d.ts +684 -0
  183. package/dist/types/utils.d.ts +45 -0
  184. package/dist/types/yaml/builder.d.ts +2 -0
  185. package/dist/types/yaml/index.d.ts +4 -0
  186. package/dist/types/yaml/player.d.ts +34 -0
  187. package/dist/types/yaml/utils.d.ts +9 -0
  188. package/dist/types/yaml.d.ts +215 -0
  189. package/package.json +130 -0
@@ -0,0 +1,220 @@
1
+ import type { TUserPrompt } from '../ai-model/index';
2
+ import Service from '../service/index';
3
+ import { type ActionParam, type ActionReturn, type AgentAssertOpt, type AgentDescribeElementAtPointResult, type AgentOpt, type AgentWaitForOpt, type DeepThinkOption, type DeviceAction, ExecutionDump, type LocateOption, type LocateResultElement, type LocateValidatorResult, type LocatorValidatorOption, type OnTaskStartTip, ReportActionDump, type ScrollParam, type ServiceAction, type ServiceExtractOption, type ServiceExtractParam, type UIContext } from '../types';
4
+ import type { AbstractInterface } from '../device';
5
+ import type { TaskRunner } from '../task-runner';
6
+ import { ModelConfigManager } from '@godscene/shared/env';
7
+ import { TaskCache } from './task-cache';
8
+ import { TaskExecutor } from './tasks';
9
+ export type AiActOptions = {
10
+ cacheable?: boolean;
11
+ fileChooserAccept?: string | string[];
12
+ deepThink?: DeepThinkOption;
13
+ deepLocate?: boolean;
14
+ abortSignal?: AbortSignal;
15
+ };
16
+ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInterface> {
17
+ interface: InterfaceType;
18
+ service: Service;
19
+ dump: ReportActionDump;
20
+ reportFile?: string | null;
21
+ reportFileName?: string;
22
+ taskExecutor: TaskExecutor;
23
+ opts: AgentOpt;
24
+ /**
25
+ * If true, the agent will not perform any actions
26
+ */
27
+ dryMode: boolean;
28
+ onTaskStartTip?: OnTaskStartTip;
29
+ taskCache?: TaskCache;
30
+ private dumpUpdateListeners;
31
+ get onDumpUpdate(): ((dump: string, executionDump?: ExecutionDump) => void) | undefined;
32
+ set onDumpUpdate(callback: ((dump: string, executionDump?: ExecutionDump) => void) | undefined);
33
+ destroyed: boolean;
34
+ modelConfigManager: ModelConfigManager;
35
+ /**
36
+ * Frozen page context for consistent AI operations
37
+ */
38
+ private frozenUIContext?;
39
+ private get aiActContext();
40
+ /**
41
+ * Flag to track if VL model warning has been shown
42
+ */
43
+ private hasWarnedNonVLModel;
44
+ private executionDumpIndexByRunner;
45
+ private fullActionSpace;
46
+ private reportGenerator;
47
+ get page(): InterfaceType;
48
+ /**
49
+ * Ensures VL model warning is shown once when needed
50
+ */
51
+ private ensureVLModelWarning;
52
+ private resolveReplanningCycleLimit;
53
+ constructor(interfaceInstance: InterfaceType, opts?: AgentOpt);
54
+ getActionSpace(): Promise<DeviceAction[]>;
55
+ private static readonly CONTEXT_RETRY_MAX;
56
+ private static readonly CONTEXT_RETRY_DELAY_MS;
57
+ /**
58
+ * Override in subclasses to indicate which errors are transient and should
59
+ * trigger an automatic retry when building the UI context.
60
+ * Returns `false` by default (no retry).
61
+ */
62
+ protected isRetryableContextError(_error: unknown): boolean;
63
+ getUIContext(action?: ServiceAction): Promise<UIContext>;
64
+ _snapshotContext(): Promise<UIContext>;
65
+ /**
66
+ * @deprecated Use {@link setAIActContext} instead.
67
+ */
68
+ setAIActionContext(prompt: string): Promise<void>;
69
+ setAIActContext(prompt: string): Promise<void>;
70
+ resetDump(): ReportActionDump;
71
+ appendExecutionDump(execution: ExecutionDump, runner?: TaskRunner): void;
72
+ dumpDataString(opt?: {
73
+ inlineScreenshots?: boolean;
74
+ }): string;
75
+ reportHTMLString(opt?: {
76
+ inlineScreenshots?: boolean;
77
+ }): string;
78
+ private lastExecutionDump?;
79
+ writeOutActionDumps(executionDump?: ExecutionDump): void;
80
+ private getReportMeta;
81
+ private callbackOnTaskStartTip;
82
+ wrapActionInActionSpace<T extends DeviceAction>(name: string): (param: ActionParam<T>) => Promise<ActionReturn<T>>;
83
+ callActionInActionSpace<T = any>(type: string, opt?: T): Promise<any>;
84
+ aiTap(locatePrompt: TUserPrompt, opt?: LocateOption & {
85
+ fileChooserAccept?: string | string[];
86
+ }): Promise<any>;
87
+ aiRightClick(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
88
+ aiDoubleClick(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
89
+ aiHover(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
90
+ aiInput(locatePrompt: TUserPrompt, opt: LocateOption & {
91
+ value: string | number;
92
+ } & {
93
+ autoDismissKeyboard?: boolean;
94
+ } & {
95
+ mode?: 'replace' | 'clear' | 'typeOnly' | 'append';
96
+ }): Promise<any>;
97
+ /**
98
+ * @deprecated Use aiInput(locatePrompt, opt) instead where opt contains the value
99
+ */
100
+ aiInput(value: string | number, locatePrompt: TUserPrompt, opt?: LocateOption & {
101
+ autoDismissKeyboard?: boolean;
102
+ } & {
103
+ mode?: 'replace' | 'clear' | 'typeOnly' | 'append';
104
+ }): Promise<any>;
105
+ aiKeyboardPress(locatePrompt: TUserPrompt, opt: LocateOption & {
106
+ keyName: string;
107
+ }): Promise<any>;
108
+ /**
109
+ * @deprecated Use aiKeyboardPress(locatePrompt, opt) instead where opt contains the keyName
110
+ */
111
+ aiKeyboardPress(keyName: string, locatePrompt?: TUserPrompt, opt?: LocateOption): Promise<any>;
112
+ aiScroll(locatePrompt: TUserPrompt | undefined, opt: LocateOption & ScrollParam): Promise<any>;
113
+ /**
114
+ * @deprecated Use aiScroll(locatePrompt, opt) instead where opt contains the scroll parameters
115
+ */
116
+ aiScroll(scrollParam: ScrollParam, locatePrompt?: TUserPrompt, opt?: LocateOption): Promise<any>;
117
+ aiPinch(locatePrompt: TUserPrompt | undefined, opt: LocateOption & {
118
+ direction: 'in' | 'out';
119
+ distance?: number;
120
+ duration?: number;
121
+ }): Promise<any>;
122
+ aiLongPress(locatePrompt: TUserPrompt, opt?: LocateOption & {
123
+ duration?: number;
124
+ }): Promise<any>;
125
+ aiClearInput(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
126
+ aiAct(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
127
+ /**
128
+ * @deprecated Use {@link Agent.aiAct} instead.
129
+ */
130
+ aiAction(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
131
+ aiQuery<ReturnType = any>(demand: ServiceExtractParam, opt?: ServiceExtractOption): Promise<ReturnType>;
132
+ aiBoolean(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<boolean>;
133
+ aiNumber(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<number>;
134
+ aiString(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<string>;
135
+ aiAsk(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<string>;
136
+ describeElementAtPoint(center: [number, number], opt?: {
137
+ verifyPrompt?: boolean;
138
+ retryLimit?: number;
139
+ deepLocate?: boolean;
140
+ } & LocatorValidatorOption): Promise<AgentDescribeElementAtPointResult>;
141
+ verifyLocator(prompt: string, locateOpt: LocateOption | undefined, expectCenter: [number, number], verifyLocateOption?: LocatorValidatorOption): Promise<LocateValidatorResult>;
142
+ /**
143
+ * Locate an element and return both its center point and an approximate rect.
144
+ *
145
+ * - In most locate flows, `rect` represents the matched element boundary.
146
+ * - Some models only support point grounding instead of boundary grounding.
147
+ * In those cases (for example, AutoGLM), `rect` falls back to a small 8x8
148
+ * box centered on the located point.
149
+ *
150
+ * Because `rect` may vary with the underlying model capability, avoid relying
151
+ * on it too heavily for strict boundary semantics. If you need a stable click
152
+ * target, prefer `center`.
153
+ */
154
+ aiLocate(prompt: TUserPrompt, opt?: LocateOption): Promise<Pick<LocateResultElement, "rect" | "center">>;
155
+ aiAssert(assertion: TUserPrompt, msg?: string, opt?: AgentAssertOpt & ServiceExtractOption): Promise<{
156
+ pass: boolean;
157
+ thought: string | undefined;
158
+ message: string | undefined;
159
+ } | undefined>;
160
+ aiWaitFor(assertion: TUserPrompt, opt?: AgentWaitForOpt): Promise<void>;
161
+ ai(...args: Parameters<typeof this.aiAct>): Promise<string | undefined>;
162
+ runYaml(yamlScriptContent: string): Promise<{
163
+ result: Record<string, any>;
164
+ }>;
165
+ evaluateJavaScript(script: string): Promise<any>;
166
+ /**
167
+ * Add a dump update listener
168
+ * @param listener Listener function
169
+ * @returns A remove function that can be called to remove this listener
170
+ */
171
+ addDumpUpdateListener(listener: (dump: string, executionDump?: ExecutionDump) => void): () => void;
172
+ /**
173
+ * Remove a dump update listener
174
+ * @param listener The listener function to remove
175
+ */
176
+ removeDumpUpdateListener(listener: (dump: string, executionDump?: ExecutionDump) => void): void;
177
+ /**
178
+ * Clear all dump update listeners
179
+ */
180
+ clearDumpUpdateListeners(): void;
181
+ destroy(): Promise<void>;
182
+ recordToReport(title?: string, opt?: {
183
+ content: string;
184
+ }): Promise<void>;
185
+ /**
186
+ * @deprecated Use {@link Agent.recordToReport} instead.
187
+ */
188
+ logScreenshot(title?: string, opt?: {
189
+ content: string;
190
+ }): Promise<void>;
191
+ _unstableLogContent(): {
192
+ groupName: string;
193
+ groupDescription: string | undefined;
194
+ executions: ExecutionDump[];
195
+ };
196
+ /**
197
+ * Freezes the current page context to be reused in subsequent AI operations
198
+ * This avoids recalculating page context for each operation
199
+ */
200
+ freezePageContext(): Promise<void>;
201
+ /**
202
+ * Unfreezes the page context, allowing AI operations to calculate context dynamically
203
+ */
204
+ unfreezePageContext(): Promise<void>;
205
+ /**
206
+ * Process cache configuration and return normalized cache settings
207
+ */
208
+ private processCacheConfig;
209
+ private normalizeFilePaths;
210
+ private normalizeFileInput;
211
+ /**
212
+ * Manually flush cache to file
213
+ * @param options - Optional configuration
214
+ * @param options.cleanUnused - If true, removes unused cache records before flushing
215
+ */
216
+ flushCache(options?: {
217
+ cleanUnused?: boolean;
218
+ }): Promise<void>;
219
+ }
220
+ export declare const createAgent: (interfaceInstance: AbstractInterface, opts?: AgentOpt) => Agent<AbstractInterface>;
File without changes
@@ -0,0 +1,36 @@
1
+ import { type TaskExecutionError, TaskRunner } from '../task-runner';
2
+ import type { ExecutionTaskApply, ExecutionTaskProgressOptions, UIContext } from '../types';
3
+ type ExecutionSessionOptions = ExecutionTaskProgressOptions & {
4
+ tasks?: ExecutionTaskApply[];
5
+ onTaskUpdate?: (runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void;
6
+ };
7
+ /**
8
+ * Thin wrapper around {@link TaskRunner} that represents a single linear execution run.
9
+ */
10
+ export declare class ExecutionSession {
11
+ private readonly runner;
12
+ constructor(name: string, contextProvider: () => Promise<UIContext>, options?: ExecutionSessionOptions);
13
+ append(tasks: ExecutionTaskApply[] | ExecutionTaskApply, options?: {
14
+ allowWhenError?: boolean;
15
+ }): Promise<void>;
16
+ appendAndRun(tasks: ExecutionTaskApply[] | ExecutionTaskApply, options?: {
17
+ allowWhenError?: boolean;
18
+ }): Promise<{
19
+ output: any;
20
+ thought?: string;
21
+ } | undefined>;
22
+ run(options?: {
23
+ allowWhenError?: boolean;
24
+ }): Promise<{
25
+ output: any;
26
+ thought?: string;
27
+ } | undefined>;
28
+ isInErrorState(): boolean;
29
+ latestErrorTask(): import("../types").ExecutionTask | null;
30
+ appendErrorPlan(errorMsg: string): Promise<{
31
+ output: undefined;
32
+ runner: TaskRunner;
33
+ }>;
34
+ getRunner(): TaskRunner;
35
+ }
36
+ export {};
@@ -0,0 +1,9 @@
1
+ export { Agent, createAgent } from './agent';
2
+ export { commonContextParser } from './utils';
3
+ export { getReportFileName, printReportMsg, } from './utils';
4
+ export { extractInsightParam, locateParamStr, paramStr, taskTitleStr, typeStr, } from './ui-utils';
5
+ export { type LocateCache, type PlanningCache, TaskCache } from './task-cache';
6
+ export { cacheFileExt } from './task-cache';
7
+ export { TaskExecutor } from './tasks';
8
+ export type { AgentOpt } from '../types';
9
+ export type { AiActOptions } from './agent';
@@ -0,0 +1,34 @@
1
+ import type { AbstractInterface } from '../device';
2
+ import type Service from '../service';
3
+ import type { DetailedLocateParam, DeviceAction, ExecutionTaskApply, PlanningAction, PlanningLocateParam } from '../types';
4
+ import type { IModelConfig } from '@godscene/shared/env';
5
+ import type { TaskCache } from './task-cache';
6
+ export declare function locatePlanForLocate(param: string | DetailedLocateParam): PlanningAction<PlanningLocateParam>;
7
+ interface TaskBuilderDeps {
8
+ interfaceInstance: AbstractInterface;
9
+ service: Service;
10
+ taskCache?: TaskCache;
11
+ actionSpace: DeviceAction[];
12
+ waitAfterAction?: number;
13
+ }
14
+ interface BuildOptions {
15
+ cacheable?: boolean;
16
+ deepLocate?: boolean;
17
+ abortSignal?: AbortSignal;
18
+ }
19
+ export declare class TaskBuilder {
20
+ private readonly interface;
21
+ private readonly service;
22
+ private readonly taskCache?;
23
+ private readonly actionSpace;
24
+ private readonly waitAfterAction?;
25
+ constructor({ interfaceInstance, service, taskCache, actionSpace, waitAfterAction, }: TaskBuilderDeps);
26
+ build(plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, options?: BuildOptions): Promise<{
27
+ tasks: ExecutionTaskApply[];
28
+ }>;
29
+ private handleFinishedPlan;
30
+ private handleLocatePlan;
31
+ private handleActionPlan;
32
+ private createLocateTask;
33
+ }
34
+ export {};
@@ -0,0 +1,49 @@
1
+ import type { TUserPrompt } from '../ai-model';
2
+ import type { ElementCacheFeature } from '../types';
3
+ export declare const debug: import("@godscene/shared/logger").DebugFunction;
4
+ export interface PlanningCache {
5
+ type: 'plan';
6
+ prompt: string;
7
+ yamlWorkflow: string;
8
+ }
9
+ export interface LocateCache {
10
+ type: 'locate';
11
+ prompt: TUserPrompt;
12
+ cache?: ElementCacheFeature;
13
+ /** @deprecated kept for backward compatibility */
14
+ xpaths?: string[];
15
+ }
16
+ export interface MatchCacheResult<T extends PlanningCache | LocateCache> {
17
+ cacheContent: T;
18
+ cacheUsable: boolean;
19
+ updateFn: (cb: (cache: T) => void) => void;
20
+ }
21
+ export type CacheFileContent = {
22
+ midsceneVersion: string;
23
+ cacheId: string;
24
+ caches: Array<PlanningCache | LocateCache>;
25
+ };
26
+ export declare const cacheFileExt = ".cache.yaml";
27
+ export declare class TaskCache {
28
+ cacheId: string;
29
+ cacheFilePath?: string;
30
+ cache: CacheFileContent;
31
+ isCacheResultUsed: boolean;
32
+ cacheOriginalLength: number;
33
+ readOnlyMode: boolean;
34
+ writeOnlyMode: boolean;
35
+ private matchedCacheIndices;
36
+ constructor(cacheId: string, isCacheResultUsed: boolean, cacheFilePath?: string, options?: {
37
+ readOnly?: boolean;
38
+ writeOnly?: boolean;
39
+ });
40
+ matchCache(prompt: TUserPrompt, type: 'plan' | 'locate'): MatchCacheResult<PlanningCache | LocateCache> | undefined;
41
+ matchPlanCache(prompt: string): MatchCacheResult<PlanningCache> | undefined;
42
+ matchLocateCache(prompt: TUserPrompt): MatchCacheResult<LocateCache> | undefined;
43
+ appendCache(cache: PlanningCache | LocateCache): void;
44
+ loadCacheFromFile(): CacheFileContent | undefined;
45
+ flushCacheToFile(options?: {
46
+ cleanUnused?: boolean;
47
+ }): void;
48
+ updateOrAppendCacheRecord(newRecord: PlanningCache | LocateCache, cachedRecord?: MatchCacheResult<PlanningCache | LocateCache>): void;
49
+ }
@@ -0,0 +1,70 @@
1
+ import { type TMultimodalPrompt, type TUserPrompt } from '../common';
2
+ import type { AbstractInterface } from '../device';
3
+ import type Service from '../service';
4
+ import type { TaskRunner } from '../task-runner';
5
+ import { TaskExecutionError } from '../task-runner';
6
+ import type { DeepThinkOption, DeviceAction, ExecutionTaskApply, ExecutionTaskProgressOptions, MidsceneYamlFlowItem, PlanningAction, PlanningActionParamWaitFor, ServiceExtractOption, ServiceExtractParam } from '../types';
7
+ import type { IModelConfig } from '@godscene/shared/env';
8
+ import type { TaskCache } from './task-cache';
9
+ export { locatePlanForLocate } from './task-builder';
10
+ interface ExecutionResult<OutputType = any> {
11
+ output: OutputType;
12
+ thought?: string;
13
+ runner: TaskRunner;
14
+ }
15
+ interface TaskExecutorHooks {
16
+ onTaskUpdate?: (runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void;
17
+ }
18
+ export { TaskExecutionError };
19
+ export declare class TaskExecutor {
20
+ interface: AbstractInterface;
21
+ service: Service;
22
+ taskCache?: TaskCache;
23
+ private readonly providedActionSpace;
24
+ private readonly taskBuilder;
25
+ onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];
26
+ private readonly hooks?;
27
+ replanningCycleLimit?: number;
28
+ waitAfterAction?: number;
29
+ useDeviceTime?: boolean;
30
+ get page(): AbstractInterface;
31
+ constructor(interfaceInstance: AbstractInterface, service: Service, opts: {
32
+ taskCache?: TaskCache;
33
+ onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
34
+ replanningCycleLimit?: number;
35
+ waitAfterAction?: number;
36
+ useDeviceTime?: boolean;
37
+ hooks?: TaskExecutorHooks;
38
+ actionSpace: DeviceAction[];
39
+ });
40
+ private createExecutionSession;
41
+ private getActionSpace;
42
+ /**
43
+ * Get a readable time string. When device time is enabled, use the
44
+ * device-formatted wall-clock time directly so host timezone formatting does
45
+ * not reinterpret a device timestamp.
46
+ * @param format - Optional format string
47
+ * @returns A formatted time string
48
+ */
49
+ private getTimeString;
50
+ convertPlanToExecutable(plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, options?: {
51
+ cacheable?: boolean;
52
+ deepLocate?: boolean;
53
+ abortSignal?: AbortSignal;
54
+ }): Promise<{
55
+ tasks: ExecutionTaskApply[];
56
+ }>;
57
+ loadYamlFlowAsPlanning(userInstruction: string, yamlString: string): Promise<{
58
+ runner: TaskRunner;
59
+ }>;
60
+ runPlans(title: string, plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig): Promise<ExecutionResult>;
61
+ action(userPrompt: string, modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, includeBboxInPlanning: boolean, aiActContext?: string, cacheable?: boolean, replanningCycleLimitOverride?: number, imagesIncludeCount?: number, deepThink?: DeepThinkOption, fileChooserAccept?: string[], deepLocate?: boolean, abortSignal?: AbortSignal): Promise<ExecutionResult<{
62
+ yamlFlow?: MidsceneYamlFlowItem[];
63
+ output?: string;
64
+ } | undefined>>;
65
+ private runAction;
66
+ private createTypeQueryTask;
67
+ createTypeQueryExecution<T>(type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert', demand: ServiceExtractParam, modelConfig: IModelConfig, opt?: ServiceExtractOption, multimodalPrompt?: TMultimodalPrompt): Promise<ExecutionResult<T>>;
68
+ waitFor(assertion: TUserPrompt, opt: PlanningActionParamWaitFor, modelConfig: IModelConfig): Promise<ExecutionResult<void>>;
69
+ }
70
+ export declare function withFileChooser<T>(interfaceInstance: AbstractInterface, fileChooserAccept: string[] | undefined, action: () => Promise<T>): Promise<T>;
@@ -0,0 +1,14 @@
1
+ import type { DetailedLocateParam, ExecutionTask, PullParam, ScrollParam } from '../types';
2
+ export declare function typeStr(task: ExecutionTask): any;
3
+ export declare function locateParamStr(locate?: DetailedLocateParam | string): string;
4
+ export declare function scrollParamStr(scrollParam?: ScrollParam): string;
5
+ export declare function pullParamStr(pullParam?: PullParam): string;
6
+ export declare function extractInsightParam(taskParam: any): {
7
+ content: string;
8
+ images?: Array<{
9
+ name: string;
10
+ url: string;
11
+ }>;
12
+ };
13
+ export declare function taskTitleStr(type: 'Tap' | 'Hover' | 'Input' | 'RightClick' | 'KeyboardPress' | 'Scroll' | 'Act' | 'Query' | 'Assert' | 'WaitFor' | 'Locate' | 'Boolean' | 'Number' | 'String', prompt: string): string;
14
+ export declare function paramStr(task: ExecutionTask): string;
@@ -0,0 +1,25 @@
1
+ import type { TMultimodalPrompt, TUserPrompt } from '../common';
2
+ import type { AbstractInterface } from '../device';
3
+ import type { ElementCacheFeature, LocateResultElement, PlanningLocateParam, Rect, UIContext } from '../types';
4
+ import type { TModelFamily } from '@godscene/shared/env';
5
+ import type { TaskCache } from './task-cache';
6
+ export declare function commonContextParser(interfaceInstance: AbstractInterface, _opt: {
7
+ uploadServerUrl?: string;
8
+ screenshotShrinkFactor?: number;
9
+ modelFamily?: TModelFamily;
10
+ }): Promise<UIContext>;
11
+ export declare function getReportFileName(tag?: string): string;
12
+ export declare function printReportMsg(filepath: string): void;
13
+ export declare function ifPlanLocateParamIsBbox(planLocateParam: PlanningLocateParam): boolean;
14
+ export declare function matchElementFromPlan(planLocateParam: PlanningLocateParam): LocateResultElement | undefined;
15
+ export declare function matchElementFromCache(context: {
16
+ taskCache?: TaskCache;
17
+ interfaceInstance: AbstractInterface;
18
+ }, cacheEntry: ElementCacheFeature | undefined, cachePrompt: TUserPrompt, cacheable: boolean | undefined): Promise<LocateResultElement | undefined>;
19
+ export declare const getMidsceneVersion: () => string;
20
+ export declare const parsePrompt: (prompt: TUserPrompt) => {
21
+ textPrompt: string;
22
+ multimodalPrompt?: TMultimodalPrompt;
23
+ };
24
+ export declare const transformLogicalElementToScreenshot: (element: LocateResultElement, shrunkShotToLogicalRatio: number) => LocateResultElement;
25
+ export declare const transformLogicalRectToScreenshotRect: (rect: Rect, shrunkShotToLogicalRatio: number) => Rect;
@@ -0,0 +1,78 @@
1
+ import type { DeviceAction } from '../../device';
2
+ import type { PlanningAction } from '../../types';
3
+ export interface BaseAction {
4
+ _metadata: string;
5
+ think?: string;
6
+ }
7
+ export interface TapAction extends BaseAction {
8
+ _metadata: 'do';
9
+ action: 'Tap';
10
+ element: [number, number];
11
+ }
12
+ export interface DoubleTapAction extends BaseAction {
13
+ _metadata: 'do';
14
+ action: 'Double Tap';
15
+ element: [number, number];
16
+ }
17
+ export interface TypeAction extends BaseAction {
18
+ _metadata: 'do';
19
+ action: 'Type';
20
+ text: string;
21
+ }
22
+ export interface SwipeAction extends BaseAction {
23
+ _metadata: 'do';
24
+ action: 'Swipe';
25
+ start: [number, number];
26
+ end: [number, number];
27
+ }
28
+ export interface LongPressAction extends BaseAction {
29
+ _metadata: 'do';
30
+ action: 'Long Press';
31
+ element: [number, number];
32
+ }
33
+ export interface LaunchAction extends BaseAction {
34
+ _metadata: 'do';
35
+ action: 'Launch';
36
+ app: string;
37
+ }
38
+ export interface BackAction extends BaseAction {
39
+ _metadata: 'do';
40
+ action: 'Back';
41
+ }
42
+ export interface HomeAction extends BaseAction {
43
+ _metadata: 'do';
44
+ action: 'Home';
45
+ }
46
+ export interface WaitAction extends BaseAction {
47
+ _metadata: 'do';
48
+ action: 'Wait';
49
+ durationMs: number;
50
+ }
51
+ export interface InteractAction extends BaseAction {
52
+ _metadata: 'do';
53
+ action: 'Interact';
54
+ }
55
+ export interface CallAPIAction extends BaseAction {
56
+ _metadata: 'do';
57
+ action: 'Call_API';
58
+ instruction: string;
59
+ }
60
+ export interface TakeoverAction extends BaseAction {
61
+ _metadata: 'do';
62
+ action: 'Take_over';
63
+ message: string;
64
+ }
65
+ export interface NoteAction extends BaseAction {
66
+ _metadata: 'do';
67
+ action: 'Note';
68
+ message: string;
69
+ }
70
+ export interface FinishAction extends BaseAction {
71
+ _metadata: 'finish';
72
+ message: string;
73
+ }
74
+ export type ParsedAction = TapAction | DoubleTapAction | TypeAction | SwipeAction | LongPressAction | LaunchAction | BackAction | HomeAction | WaitAction | InteractAction | CallAPIAction | TakeoverAction | NoteAction | FinishAction;
75
+ export declare function transformAutoGLMAction(action: ParsedAction, size: {
76
+ width: number;
77
+ height: number;
78
+ }, actionSpace?: DeviceAction[]): PlanningAction[];
@@ -0,0 +1,6 @@
1
+ export { getAutoGLMLocatePrompt, getAutoGLMPlanPrompt } from './prompt';
2
+ export { parseAutoGLMLocateResponse, parseAutoGLMResponse, parseAction, } from './parser';
3
+ export { autoGLMPlanning } from './planning';
4
+ export { transformAutoGLMAction } from './actions';
5
+ export { isAutoGLM, isUITars } from './util';
6
+ export type { ParsedAction } from './actions';
@@ -0,0 +1,18 @@
1
+ import type { ParsedAction } from './actions';
2
+ export declare const extractValueAfter: (src: string, key: string) => string;
3
+ export declare function parseAction(response: {
4
+ think: string;
5
+ content: string;
6
+ }): ParsedAction;
7
+ export declare function parseAutoGLMResponse(content: string): {
8
+ think: string;
9
+ content: string;
10
+ };
11
+ export declare function parseAutoGLMLocateResponse(rawResponse: string): {
12
+ think: string;
13
+ coordinates: {
14
+ x: number;
15
+ y: number;
16
+ } | null;
17
+ error?: string;
18
+ };
@@ -0,0 +1,12 @@
1
+ import type { DeviceAction } from '../../device';
2
+ import type { PlanningAIResponse, UIContext } from '../../types';
3
+ import type { IModelConfig } from '@godscene/shared/env';
4
+ import type { ConversationHistory } from '../conversation-history';
5
+ export declare function autoGLMPlanning(userInstruction: string, options: {
6
+ conversationHistory: ConversationHistory;
7
+ context: UIContext;
8
+ modelConfig: IModelConfig;
9
+ actionContext?: string;
10
+ actionSpace?: DeviceAction[];
11
+ abortSignal?: AbortSignal;
12
+ }): Promise<PlanningAIResponse>;
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Auto-GLM Prompt Templates
3
+ *
4
+ * Portions of this file are derived from Open-AutoGLM
5
+ * Copyright (c) 2024 zai-org
6
+ * Licensed under the Apache License, Version 2.0
7
+ *
8
+ * Source: https://github.com/zai-org/Open-AutoGLM
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ *
22
+ * Modifications:
23
+ * - Adapted prompts for RPA智能助手 integration
24
+ */
25
+ import type { TModelFamily } from '@godscene/shared/env';
26
+ export declare const getAutoGLMPlanPrompt: (modelFamily: TModelFamily | undefined) => string;
27
+ export declare const getAutoGLMLocatePrompt: (modelFamily: TModelFamily | undefined) => string;
@@ -0,0 +1,13 @@
1
+ import type { TModelFamily } from '@godscene/shared/env';
2
+ /**
3
+ * Check if the modelFamily is auto-glm or auto-glm-multilingual
4
+ * @param modelFamily The model family to check
5
+ * @returns true if modelFamily is auto-glm or auto-glm-multilingual
6
+ */
7
+ export declare function isAutoGLM(modelFamily: TModelFamily | undefined): boolean;
8
+ /**
9
+ * Check if the modelFamily is a UI-TARS variant
10
+ * @param modelFamily The model family to check
11
+ * @returns true if modelFamily is any UI-TARS variant
12
+ */
13
+ export declare function isUITars(modelFamily: TModelFamily | undefined): boolean;
@@ -0,0 +1,20 @@
1
+ import type { IModelConfig, TIntent } from '@godscene/shared/env';
2
+ export interface ConnectivityCheckResultItem {
3
+ name: 'text' | 'vision' | 'aiLocate';
4
+ intent: TIntent;
5
+ modelName: string;
6
+ modelFamily?: string;
7
+ passed: boolean;
8
+ durationMs: number;
9
+ message: string;
10
+ }
11
+ export interface ConnectivityTestResult {
12
+ passed: boolean;
13
+ checks: ConnectivityCheckResultItem[];
14
+ }
15
+ export interface ConnectivityTestConfig {
16
+ defaultModelConfig: IModelConfig;
17
+ planningModelConfig: IModelConfig;
18
+ insightModelConfig: IModelConfig;
19
+ }
20
+ export declare function runConnectivityTest(config: ConnectivityTestConfig): Promise<ConnectivityTestResult>;