@godscene/core 1.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +767 -0
  4. package/dist/es/agent/common.mjs +0 -0
  5. package/dist/es/agent/execution-session.mjs +39 -0
  6. package/dist/es/agent/index.mjs +6 -0
  7. package/dist/es/agent/task-builder.mjs +343 -0
  8. package/dist/es/agent/task-cache.mjs +212 -0
  9. package/dist/es/agent/tasks.mjs +428 -0
  10. package/dist/es/agent/ui-utils.mjs +101 -0
  11. package/dist/es/agent/utils.mjs +167 -0
  12. package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
  13. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  14. package/dist/es/ai-model/auto-glm/parser.mjs +237 -0
  15. package/dist/es/ai-model/auto-glm/planning.mjs +69 -0
  16. package/dist/es/ai-model/auto-glm/prompt.mjs +220 -0
  17. package/dist/es/ai-model/auto-glm/util.mjs +7 -0
  18. package/dist/es/ai-model/connectivity.mjs +136 -0
  19. package/dist/es/ai-model/conversation-history.mjs +193 -0
  20. package/dist/es/ai-model/index.mjs +12 -0
  21. package/dist/es/ai-model/inspect.mjs +395 -0
  22. package/dist/es/ai-model/llm-planning.mjs +231 -0
  23. package/dist/es/ai-model/prompt/common.mjs +5 -0
  24. package/dist/es/ai-model/prompt/describe.mjs +64 -0
  25. package/dist/es/ai-model/prompt/extraction.mjs +129 -0
  26. package/dist/es/ai-model/prompt/llm-locator.mjs +49 -0
  27. package/dist/es/ai-model/prompt/llm-planning.mjs +584 -0
  28. package/dist/es/ai-model/prompt/llm-section-locator.mjs +42 -0
  29. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +33 -0
  30. package/dist/es/ai-model/prompt/playwright-generator.mjs +115 -0
  31. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +34 -0
  32. package/dist/es/ai-model/prompt/util.mjs +57 -0
  33. package/dist/es/ai-model/prompt/yaml-generator.mjs +201 -0
  34. package/dist/es/ai-model/service-caller/codex-app-server.mjs +573 -0
  35. package/dist/es/ai-model/service-caller/image-detail.mjs +4 -0
  36. package/dist/es/ai-model/service-caller/index.mjs +648 -0
  37. package/dist/es/ai-model/service-caller/request-timeout.mjs +47 -0
  38. package/dist/es/ai-model/ui-tars-planning.mjs +247 -0
  39. package/dist/es/common.mjs +382 -0
  40. package/dist/es/device/device-options.mjs +0 -0
  41. package/dist/es/device/index.mjs +340 -0
  42. package/dist/es/dump/html-utils.mjs +290 -0
  43. package/dist/es/dump/index.mjs +3 -0
  44. package/dist/es/dump/screenshot-restoration.mjs +30 -0
  45. package/dist/es/dump/screenshot-store.mjs +125 -0
  46. package/dist/es/index.mjs +17 -0
  47. package/dist/es/report-cli.mjs +149 -0
  48. package/dist/es/report-generator.mjs +203 -0
  49. package/dist/es/report-markdown.mjs +216 -0
  50. package/dist/es/report.mjs +287 -0
  51. package/dist/es/screenshot-item.mjs +120 -0
  52. package/dist/es/service/index.mjs +272 -0
  53. package/dist/es/service/utils.mjs +13 -0
  54. package/dist/es/skill/index.mjs +35 -0
  55. package/dist/es/task-runner.mjs +261 -0
  56. package/dist/es/task-timing.mjs +10 -0
  57. package/dist/es/tree.mjs +11 -0
  58. package/dist/es/types.mjs +202 -0
  59. package/dist/es/utils.mjs +232 -0
  60. package/dist/es/yaml/builder.mjs +11 -0
  61. package/dist/es/yaml/index.mjs +4 -0
  62. package/dist/es/yaml/player.mjs +425 -0
  63. package/dist/es/yaml/utils.mjs +100 -0
  64. package/dist/es/yaml.mjs +0 -0
  65. package/dist/lib/agent/agent.js +815 -0
  66. package/dist/lib/agent/common.js +5 -0
  67. package/dist/lib/agent/execution-session.js +73 -0
  68. package/dist/lib/agent/index.js +76 -0
  69. package/dist/lib/agent/task-builder.js +380 -0
  70. package/dist/lib/agent/task-cache.js +264 -0
  71. package/dist/lib/agent/tasks.js +471 -0
  72. package/dist/lib/agent/ui-utils.js +153 -0
  73. package/dist/lib/agent/utils.js +238 -0
  74. package/dist/lib/ai-model/auto-glm/actions.js +271 -0
  75. package/dist/lib/ai-model/auto-glm/index.js +64 -0
  76. package/dist/lib/ai-model/auto-glm/parser.js +280 -0
  77. package/dist/lib/ai-model/auto-glm/planning.js +103 -0
  78. package/dist/lib/ai-model/auto-glm/prompt.js +257 -0
  79. package/dist/lib/ai-model/auto-glm/util.js +44 -0
  80. package/dist/lib/ai-model/connectivity.js +180 -0
  81. package/dist/lib/ai-model/conversation-history.js +227 -0
  82. package/dist/lib/ai-model/index.js +127 -0
  83. package/dist/lib/ai-model/inspect.js +441 -0
  84. package/dist/lib/ai-model/llm-planning.js +268 -0
  85. package/dist/lib/ai-model/prompt/common.js +39 -0
  86. package/dist/lib/ai-model/prompt/describe.js +98 -0
  87. package/dist/lib/ai-model/prompt/extraction.js +169 -0
  88. package/dist/lib/ai-model/prompt/llm-locator.js +86 -0
  89. package/dist/lib/ai-model/prompt/llm-planning.js +621 -0
  90. package/dist/lib/ai-model/prompt/llm-section-locator.js +79 -0
  91. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +70 -0
  92. package/dist/lib/ai-model/prompt/playwright-generator.js +176 -0
  93. package/dist/lib/ai-model/prompt/ui-tars-planning.js +71 -0
  94. package/dist/lib/ai-model/prompt/util.js +103 -0
  95. package/dist/lib/ai-model/prompt/yaml-generator.js +262 -0
  96. package/dist/lib/ai-model/service-caller/codex-app-server.js +622 -0
  97. package/dist/lib/ai-model/service-caller/image-detail.js +38 -0
  98. package/dist/lib/ai-model/service-caller/index.js +716 -0
  99. package/dist/lib/ai-model/service-caller/request-timeout.js +93 -0
  100. package/dist/lib/ai-model/ui-tars-planning.js +281 -0
  101. package/dist/lib/common.js +491 -0
  102. package/dist/lib/device/device-options.js +18 -0
  103. package/dist/lib/device/index.js +467 -0
  104. package/dist/lib/dump/html-utils.js +366 -0
  105. package/dist/lib/dump/index.js +58 -0
  106. package/dist/lib/dump/screenshot-restoration.js +64 -0
  107. package/dist/lib/dump/screenshot-store.js +165 -0
  108. package/dist/lib/index.js +184 -0
  109. package/dist/lib/report-cli.js +189 -0
  110. package/dist/lib/report-generator.js +244 -0
  111. package/dist/lib/report-markdown.js +253 -0
  112. package/dist/lib/report.js +333 -0
  113. package/dist/lib/screenshot-item.js +154 -0
  114. package/dist/lib/service/index.js +306 -0
  115. package/dist/lib/service/utils.js +47 -0
  116. package/dist/lib/skill/index.js +69 -0
  117. package/dist/lib/task-runner.js +298 -0
  118. package/dist/lib/task-timing.js +44 -0
  119. package/dist/lib/tree.js +51 -0
  120. package/dist/lib/types.js +298 -0
  121. package/dist/lib/utils.js +314 -0
  122. package/dist/lib/yaml/builder.js +55 -0
  123. package/dist/lib/yaml/index.js +79 -0
  124. package/dist/lib/yaml/player.js +459 -0
  125. package/dist/lib/yaml/utils.js +153 -0
  126. package/dist/lib/yaml.js +18 -0
  127. package/dist/types/agent/agent.d.ts +220 -0
  128. package/dist/types/agent/common.d.ts +0 -0
  129. package/dist/types/agent/execution-session.d.ts +36 -0
  130. package/dist/types/agent/index.d.ts +9 -0
  131. package/dist/types/agent/task-builder.d.ts +34 -0
  132. package/dist/types/agent/task-cache.d.ts +49 -0
  133. package/dist/types/agent/tasks.d.ts +70 -0
  134. package/dist/types/agent/ui-utils.d.ts +14 -0
  135. package/dist/types/agent/utils.d.ts +25 -0
  136. package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
  137. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  138. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  139. package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
  140. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  141. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  142. package/dist/types/ai-model/connectivity.d.ts +20 -0
  143. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  144. package/dist/types/ai-model/index.d.ts +16 -0
  145. package/dist/types/ai-model/inspect.d.ts +67 -0
  146. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  147. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  148. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  149. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  150. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  151. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  152. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  153. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  154. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  155. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  156. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  157. package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
  158. package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
  159. package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
  160. package/dist/types/ai-model/service-caller/index.d.ts +60 -0
  161. package/dist/types/ai-model/service-caller/request-timeout.d.ts +32 -0
  162. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  163. package/dist/types/common.d.ts +288 -0
  164. package/dist/types/device/device-options.d.ts +155 -0
  165. package/dist/types/device/index.d.ts +2565 -0
  166. package/dist/types/dump/html-utils.d.ts +75 -0
  167. package/dist/types/dump/index.d.ts +5 -0
  168. package/dist/types/dump/screenshot-restoration.d.ts +8 -0
  169. package/dist/types/dump/screenshot-store.d.ts +49 -0
  170. package/dist/types/index.d.ts +21 -0
  171. package/dist/types/report-cli.d.ts +36 -0
  172. package/dist/types/report-generator.d.ts +88 -0
  173. package/dist/types/report-markdown.d.ts +24 -0
  174. package/dist/types/report.d.ts +52 -0
  175. package/dist/types/screenshot-item.d.ts +67 -0
  176. package/dist/types/service/index.d.ts +24 -0
  177. package/dist/types/service/utils.d.ts +2 -0
  178. package/dist/types/skill/index.d.ts +25 -0
  179. package/dist/types/task-runner.d.ts +50 -0
  180. package/dist/types/task-timing.d.ts +8 -0
  181. package/dist/types/tree.d.ts +4 -0
  182. package/dist/types/types.d.ts +684 -0
  183. package/dist/types/utils.d.ts +45 -0
  184. package/dist/types/yaml/builder.d.ts +2 -0
  185. package/dist/types/yaml/index.d.ts +4 -0
  186. package/dist/types/yaml/player.d.ts +34 -0
  187. package/dist/types/yaml/utils.d.ts +9 -0
  188. package/dist/types/yaml.d.ts +215 -0
  189. package/package.json +130 -0
@@ -0,0 +1,105 @@
1
+ import type { SubGoal } from '../types';
2
+ import type { ChatCompletionMessageParam } from 'openai/resources/index';
3
+ export interface ConversationHistoryOptions {
4
+ initialMessages?: ChatCompletionMessageParam[];
5
+ }
6
+ export declare class ConversationHistory {
7
+ private readonly messages;
8
+ private subGoals;
9
+ private memories;
10
+ private historicalLogs;
11
+ pendingFeedbackMessage: string;
12
+ constructor(options?: ConversationHistoryOptions);
13
+ resetPendingFeedbackMessageIfExists(): void;
14
+ append(message: ChatCompletionMessageParam): void;
15
+ seed(messages: ChatCompletionMessageParam[]): void;
16
+ reset(): void;
17
+ /**
18
+ * Snapshot the conversation history, and replace the images with text if the number of images exceeds the limit.
19
+ * @param maxImages - The maximum number of images to include in the snapshot. Undefined means no limit.
20
+ * @returns The snapshot of the conversation history.
21
+ */
22
+ snapshot(maxImages?: number): ChatCompletionMessageParam[];
23
+ get length(): number;
24
+ [Symbol.iterator](): IterableIterator<ChatCompletionMessageParam>;
25
+ toJSON(): ChatCompletionMessageParam[];
26
+ /**
27
+ * Set all sub-goals, replacing any existing ones.
28
+ * Automatically marks the first pending goal as running.
29
+ */
30
+ setSubGoals(subGoals: SubGoal[]): void;
31
+ /**
32
+ * Merge sub-goals from update-plan-content.
33
+ * Preserves existing descriptions when incoming description is empty.
34
+ *
35
+ * This handles compact XML updates like:
36
+ * <sub-goal index="1" status="finished" />
37
+ */
38
+ mergeSubGoals(subGoals: SubGoal[]): void;
39
+ /**
40
+ * Update a single sub-goal by index.
41
+ * Clears logs if status or description actually changes.
42
+ * @returns true if the sub-goal was found and updated, false otherwise
43
+ */
44
+ updateSubGoal(index: number, updates: Partial<Omit<SubGoal, 'index'>>): boolean;
45
+ /**
46
+ * Mark the first pending sub-goal as running.
47
+ * Clears logs since status changes.
48
+ */
49
+ markFirstPendingAsRunning(): void;
50
+ /**
51
+ * Mark a sub-goal as finished.
52
+ * Automatically marks the next pending goal as running.
53
+ * @returns true if the sub-goal was found and updated, false otherwise
54
+ */
55
+ markSubGoalFinished(index: number): boolean;
56
+ /**
57
+ * Mark all sub-goals as finished.
58
+ * Clears logs for any goal whose status actually changes.
59
+ */
60
+ markAllSubGoalsFinished(): void;
61
+ /**
62
+ * Append a log entry to the currently running sub-goal.
63
+ * The log describes an action performed while working on the sub-goal.
64
+ */
65
+ appendSubGoalLog(log: string): void;
66
+ /**
67
+ * Convert sub-goals to text representation.
68
+ * Includes actions performed (logs) for the current sub-goal.
69
+ */
70
+ subGoalsToText(): string;
71
+ /**
72
+ * Append a log entry to the historical logs list.
73
+ * Used in non-deepThink mode to track executed steps across planning rounds.
74
+ */
75
+ appendHistoricalLog(log: string): void;
76
+ /**
77
+ * Convert historical logs to text representation.
78
+ * Provides context about previously executed steps to the model.
79
+ */
80
+ historicalLogsToText(): string;
81
+ /**
82
+ * Append a memory to the memories list
83
+ */
84
+ appendMemory(memory: string): void;
85
+ /**
86
+ * Get all memories
87
+ */
88
+ getMemories(): string[];
89
+ /**
90
+ * Convert memories to text representation
91
+ */
92
+ memoriesToText(): string;
93
+ /**
94
+ * Clear all memories
95
+ */
96
+ clearMemories(): void;
97
+ /**
98
+ * Compress the conversation history if it exceeds the threshold.
99
+ * Removes the oldest messages and replaces them with a single placeholder message.
100
+ * @param threshold - The number of messages that triggers compression.
101
+ * @param keepCount - The number of recent messages to keep after compression.
102
+ * @returns true if compression was performed, false otherwise.
103
+ */
104
+ compressHistory(threshold: number, keepCount: number): boolean;
105
+ }
@@ -0,0 +1,16 @@
1
+ export { AIResponseParseError, callAIWithStringResponse, callAIWithObjectResponse, callAI, } from './service-caller/index';
2
+ export { runConnectivityTest, type ConnectivityCheckResultItem, type ConnectivityTestConfig, type ConnectivityTestResult, } from './connectivity';
3
+ export { systemPromptToLocateElement } from './prompt/llm-locator';
4
+ export { generatePlaywrightTest, generatePlaywrightTestStream, } from './prompt/playwright-generator';
5
+ export { generateYamlTest, generateYamlTestStream, } from './prompt/yaml-generator';
6
+ export type { YamlGenerationOptions } from './prompt/yaml-generator';
7
+ export type { ChatCompletionMessageParam } from 'openai/resources/index';
8
+ export { AiLocateElement, AiExtractElementInfo, AiLocateSection, AiJudgeOrderSensitive, } from './inspect';
9
+ export { plan } from './llm-planning';
10
+ export { autoGLMPlanning } from './auto-glm/planning';
11
+ export { adaptBboxToRect } from '../common';
12
+ export { uiTarsPlanning } from './ui-tars-planning';
13
+ export { ConversationHistory, type ConversationHistoryOptions, } from './conversation-history';
14
+ export type { SubGoal, SubGoalStatus } from '../types';
15
+ export type { AIArgs } from '../common';
16
+ export { getMidsceneLocationSchema, PointSchema, SizeSchema, RectSchema, TMultimodalPromptSchema, TUserPromptSchema, type TMultimodalPrompt, type TUserPrompt, findAllMidsceneLocatorField, dumpActionParam, parseActionParam, } from '../common';
@@ -0,0 +1,67 @@
1
+ import type { AIDataExtractionResponse, AIUsageInfo, Rect, ServiceExtractOption, UIContext } from '../types';
2
+ import type { IModelConfig } from '@godscene/shared/env';
3
+ import type { LocateResultElement } from '@godscene/shared/types';
4
+ import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
5
+ import type { TMultimodalPrompt, TUserPrompt } from '../common';
6
+ import { callAIWithObjectResponse } from './service-caller/index';
7
+ export type AIArgs = [
8
+ ChatCompletionSystemMessageParam,
9
+ ...ChatCompletionUserMessageParam[]
10
+ ];
11
+ export declare function buildSearchAreaConfig(options: {
12
+ context: UIContext;
13
+ baseRect: Rect;
14
+ modelFamily: IModelConfig['modelFamily'];
15
+ }): Promise<{
16
+ rect: Rect;
17
+ imageBase64: string;
18
+ scale: number;
19
+ }>;
20
+ export declare function AiLocateElement(options: {
21
+ context: UIContext;
22
+ targetElementDescription: TUserPrompt;
23
+ searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;
24
+ modelConfig: IModelConfig;
25
+ abortSignal?: AbortSignal;
26
+ }): Promise<{
27
+ parseResult: {
28
+ elements: LocateResultElement[];
29
+ errors?: string[];
30
+ };
31
+ rect?: Rect;
32
+ rawResponse: string;
33
+ usage?: AIUsageInfo;
34
+ reasoning_content?: string;
35
+ }>;
36
+ export declare function AiLocateSection(options: {
37
+ context: UIContext;
38
+ sectionDescription: TUserPrompt;
39
+ modelConfig: IModelConfig;
40
+ abortSignal?: AbortSignal;
41
+ }): Promise<{
42
+ rect?: Rect;
43
+ imageBase64?: string;
44
+ scale?: number;
45
+ error?: string;
46
+ rawResponse: string;
47
+ usage?: AIUsageInfo;
48
+ }>;
49
+ export declare function AiExtractElementInfo<T>(options: {
50
+ dataQuery: string | Record<string, string>;
51
+ multimodalPrompt?: TMultimodalPrompt;
52
+ context: UIContext;
53
+ pageDescription?: string;
54
+ extractOption?: ServiceExtractOption;
55
+ modelConfig: IModelConfig;
56
+ }): Promise<{
57
+ parseResult: AIDataExtractionResponse<T>;
58
+ rawResponse: string;
59
+ usage: AIUsageInfo | undefined;
60
+ reasoning_content: string | undefined;
61
+ }>;
62
+ export declare function AiJudgeOrderSensitive(description: string, callAIFn: typeof callAIWithObjectResponse<{
63
+ isOrderSensitive: boolean;
64
+ }>, modelConfig: IModelConfig): Promise<{
65
+ isOrderSensitive: boolean;
66
+ usage?: AIUsageInfo;
67
+ }>;
@@ -0,0 +1,19 @@
1
+ import type { DeepThinkOption, DeviceAction, InterfaceType, PlanningAIResponse, RawResponsePlanningAIResponse, UIContext } from '../types';
2
+ import type { IModelConfig, TModelFamily } from '@godscene/shared/env';
3
+ import type { ConversationHistory } from './conversation-history';
4
+ /**
5
+ * Parse XML response from LLM and convert to RawResponsePlanningAIResponse
6
+ */
7
+ export declare function parseXMLPlanningResponse(xmlString: string, modelFamily: TModelFamily | undefined): RawResponsePlanningAIResponse;
8
+ export declare function plan(userInstruction: string, opts: {
9
+ context: UIContext;
10
+ interfaceType: InterfaceType;
11
+ actionSpace: DeviceAction<any>[];
12
+ actionContext?: string;
13
+ modelConfig: IModelConfig;
14
+ conversationHistory: ConversationHistory;
15
+ includeBbox: boolean;
16
+ imagesIncludeCount?: number;
17
+ deepThink?: DeepThinkOption;
18
+ abortSignal?: AbortSignal;
19
+ }): Promise<PlanningAIResponse>;
@@ -0,0 +1,2 @@
1
+ import type { TModelFamily } from '@godscene/shared/env';
2
+ export declare function bboxDescription(modelFamily: TModelFamily | undefined): "box_2d bounding box for the target element, should be [ymin, xmin, ymax, xmax] normalized to 0-1000." | "2d bounding box as [xmin, ymin, xmax, ymax]";
@@ -0,0 +1 @@
1
+ export declare const elementDescriberInstruction: () => string;
@@ -0,0 +1,7 @@
1
+ import type { AIDataExtractionResponse } from '../../types';
2
+ /**
3
+ * Parse XML response from LLM and convert to AIDataExtractionResponse
4
+ */
5
+ export declare function parseXMLExtractionResponse<T>(xmlString: string): AIDataExtractionResponse<T>;
6
+ export declare function systemPromptToExtract(): string;
7
+ export declare const extractDataQueryPrompt: (pageDescription: string, dataQuery: string | Record<string, string>) => string;
@@ -0,0 +1,3 @@
1
+ import type { TModelFamily } from '@godscene/shared/env';
2
+ export declare function systemPromptToLocateElement(modelFamily: TModelFamily | undefined): string;
3
+ export declare const findElementPrompt: (targetElementDescription: string) => string;
@@ -0,0 +1,10 @@
1
+ import type { DeviceAction } from '../../types';
2
+ import type { TModelFamily } from '@godscene/shared/env';
3
+ export declare const descriptionForAction: (action: DeviceAction<any>, locatorSchemaTypeDescription: string, includeBbox?: boolean) => string;
4
+ export declare function systemPromptToTaskPlanning({ actionSpace, modelFamily, includeBbox, includeThought, includeSubGoals, }: {
5
+ actionSpace: DeviceAction<any>[];
6
+ modelFamily: TModelFamily | undefined;
7
+ includeBbox: boolean;
8
+ includeThought?: boolean;
9
+ includeSubGoals?: boolean;
10
+ }): Promise<string>;
@@ -0,0 +1,3 @@
1
+ import type { TModelFamily } from '@godscene/shared/env';
2
+ export declare function systemPromptToLocateSection(modelFamily: TModelFamily | undefined): string;
3
+ export declare const sectionLocatorInstruction: (sectionDescription: string) => string;
@@ -0,0 +1,2 @@
1
+ export declare function systemPromptToJudgeOrderSensitive(): string;
2
+ export declare const orderSensitiveJudgePrompt: (description: string) => string;
@@ -0,0 +1,26 @@
1
+ import type { StreamingAIResponse, StreamingCodeGenerationOptions } from '../../types';
2
+ import type { IModelConfig } from '@godscene/shared/env';
3
+ import { type ChromeRecordedEvent, type EventCounts, type EventSummary, type InputDescription, type ProcessedEvent, createEventCounts, createMessageContent, extractInputDescriptions, filterEventsByType, getScreenshotsForLLM, prepareEventSummary, processEventsForLLM, validateEvents } from './yaml-generator';
4
+ export interface PlaywrightGenerationOptions {
5
+ testName?: string;
6
+ includeScreenshots?: boolean;
7
+ includeTimestamps?: boolean;
8
+ maxScreenshots?: number;
9
+ description?: string;
10
+ viewportSize?: {
11
+ width: number;
12
+ height: number;
13
+ };
14
+ waitForNetworkIdle?: boolean;
15
+ waitForNetworkIdleTimeout?: number;
16
+ }
17
+ export type { ChromeRecordedEvent, EventCounts, InputDescription, ProcessedEvent, EventSummary, };
18
+ export { getScreenshotsForLLM, filterEventsByType, createEventCounts, extractInputDescriptions, processEventsForLLM, prepareEventSummary, createMessageContent, validateEvents, };
19
+ /**
20
+ * Generates Playwright test code from recorded events
21
+ */
22
+ export declare const generatePlaywrightTest: (events: ChromeRecordedEvent[], options: PlaywrightGenerationOptions, modelConfig: IModelConfig) => Promise<string>;
23
+ /**
24
+ * Generates Playwright test code from recorded events with streaming support
25
+ */
26
+ export declare const generatePlaywrightTestStream: (events: ChromeRecordedEvent[], options: PlaywrightGenerationOptions & StreamingCodeGenerationOptions, modelConfig: IModelConfig) => Promise<StreamingAIResponse>;
@@ -0,0 +1,2 @@
1
+ export declare function getUiTarsPlanningPrompt(): string;
2
+ export declare const getSummary: (prediction: string) => string;
@@ -0,0 +1,33 @@
1
+ import type { SubGoal } from '../../types';
2
+ /**
3
+ * Extract content from an XML tag in a string, searching from the end.
4
+ * This approach handles cases where models prepend thinking content (like <think>...</think>)
5
+ * before the actual response tags, or when there are incomplete/nested tags.
6
+ *
7
+ * Strategy: Find the LAST closing tag, then search backwards for the nearest opening tag.
8
+ * This ensures we get the last complete tag pair, even if there are incomplete tags before it.
9
+ *
10
+ * @param xmlString - The XML string to parse
11
+ * @param tagName - The name of the tag to extract (case-insensitive)
12
+ * @returns The trimmed content of the tag, or undefined if not found
13
+ */
14
+ export declare function extractXMLTag(xmlString: string, tagName: string): string | undefined;
15
+ /**
16
+ * Parse sub-goals from XML content
17
+ * Handles both formats:
18
+ * - <sub-goal index="1" status="pending">description</sub-goal>
19
+ * - <sub-goal index="1" status="finished" />
20
+ */
21
+ export declare function parseSubGoalsFromXML(xmlContent: string): SubGoal[];
22
+ /**
23
+ * Extract indexes of sub-goals marked as finished from <mark-sub-goal-done> content
24
+ */
25
+ export declare function parseMarkFinishedIndexes(xmlContent: string): number[];
26
+ export declare const distanceThreshold = 16;
27
+ export declare function distance(point1: {
28
+ x: number;
29
+ y: number;
30
+ }, point2: {
31
+ x: number;
32
+ y: number;
33
+ }): number;
@@ -0,0 +1,102 @@
1
+ import type { StreamingAIResponse, StreamingCodeGenerationOptions } from '../../types';
2
+ import type { IModelConfig } from '@godscene/shared/env';
3
+ export interface EventCounts {
4
+ navigation: number;
5
+ click: number;
6
+ input: number;
7
+ scroll: number;
8
+ total: number;
9
+ }
10
+ export interface InputDescription {
11
+ description: string;
12
+ value: string;
13
+ }
14
+ export interface ProcessedEvent {
15
+ type: string;
16
+ timestamp: number;
17
+ url?: string;
18
+ title?: string;
19
+ elementDescription?: string;
20
+ value?: string;
21
+ pageInfo?: any;
22
+ elementRect?: any;
23
+ }
24
+ export interface EventSummary {
25
+ testName: string;
26
+ startUrl: string;
27
+ eventCounts: EventCounts;
28
+ urls: string[];
29
+ clickDescriptions: string[];
30
+ inputDescriptions: InputDescription[];
31
+ events: ProcessedEvent[];
32
+ }
33
+ export interface ChromeRecordedEvent {
34
+ type: string;
35
+ timestamp: number;
36
+ url?: string;
37
+ title?: string;
38
+ elementDescription?: string;
39
+ value?: string;
40
+ pageInfo?: any;
41
+ elementRect?: any;
42
+ screenshotBefore?: string;
43
+ screenshotAfter?: string;
44
+ screenshotWithBox?: string;
45
+ }
46
+ export interface YamlGenerationOptions {
47
+ testName?: string;
48
+ includeTimestamps?: boolean;
49
+ maxScreenshots?: number;
50
+ description?: string;
51
+ /** Language for human-readable YAML content (e.g. 'English', 'Chinese'). Keys and API names are kept as-is. */
52
+ language?: string;
53
+ }
54
+ export interface FilteredEvents {
55
+ navigationEvents: ChromeRecordedEvent[];
56
+ clickEvents: ChromeRecordedEvent[];
57
+ inputEvents: ChromeRecordedEvent[];
58
+ scrollEvents: ChromeRecordedEvent[];
59
+ }
60
+ /**
61
+ * Get screenshots from events for LLM context
62
+ */
63
+ export declare const getScreenshotsForLLM: (events: ChromeRecordedEvent[], maxScreenshots?: number) => string[];
64
+ /**
65
+ * Filter events by type for easier processing
66
+ */
67
+ export declare const filterEventsByType: (events: ChromeRecordedEvent[]) => FilteredEvents;
68
+ /**
69
+ * Create event counts summary
70
+ */
71
+ export declare const createEventCounts: (filteredEvents: FilteredEvents, totalEvents: number) => EventCounts;
72
+ /**
73
+ * Extract input descriptions from input events
74
+ */
75
+ export declare const extractInputDescriptions: (inputEvents: ChromeRecordedEvent[]) => InputDescription[];
76
+ /**
77
+ * Process events for LLM consumption
78
+ */
79
+ export declare const processEventsForLLM: (events: ChromeRecordedEvent[]) => ProcessedEvent[];
80
+ /**
81
+ * Prepare comprehensive event summary for LLM
82
+ */
83
+ export declare const prepareEventSummary: (events: ChromeRecordedEvent[], options?: {
84
+ testName?: string;
85
+ maxScreenshots?: number;
86
+ }) => EventSummary;
87
+ /**
88
+ * Create message content for LLM with optional screenshots
89
+ */
90
+ export declare const createMessageContent: (promptText: string, screenshots?: string[], includeScreenshots?: boolean) => any[];
91
+ /**
92
+ * Validate events before processing
93
+ */
94
+ export declare const validateEvents: (events: ChromeRecordedEvent[]) => void;
95
+ /**
96
+ * Generates YAML test configuration from recorded events using AI
97
+ */
98
+ export declare const generateYamlTest: (events: ChromeRecordedEvent[], options: YamlGenerationOptions, modelConfig: IModelConfig) => Promise<string>;
99
+ /**
100
+ * Generates YAML test configuration from recorded events using AI with streaming support
101
+ */
102
+ export declare const generateYamlTestStream: (events: ChromeRecordedEvent[], options: YamlGenerationOptions & StreamingCodeGenerationOptions, modelConfig: IModelConfig) => Promise<StreamingAIResponse>;
@@ -0,0 +1,42 @@
1
+ import type { AIUsageInfo, DeepThinkOption, StreamingCallback } from '../../types';
2
+ import type { IModelConfig } from '@godscene/shared/env';
3
+ import type { ChatCompletionMessageParam } from 'openai/resources/index';
4
+ type CodexReasoningEffort = 'low' | 'medium' | 'high' | 'xhigh';
5
+ type CodexTextInput = {
6
+ type: 'text';
7
+ text: string;
8
+ text_elements: any[];
9
+ };
10
+ type CodexImageInput = {
11
+ type: 'image';
12
+ url: string;
13
+ };
14
+ type CodexLocalImageInput = {
15
+ type: 'localImage';
16
+ path: string;
17
+ };
18
+ type CodexTurnInput = CodexTextInput | CodexImageInput | CodexLocalImageInput;
19
+ type CodexTurnResult = {
20
+ content: string;
21
+ reasoning_content?: string;
22
+ usage?: AIUsageInfo;
23
+ isStreamed: boolean;
24
+ };
25
+ export declare const isCodexAppServerProvider: (baseURL?: string) => boolean;
26
+ export declare const normalizeCodexLocalImagePath: (imageUrl: string, platform?: NodeJS.Platform) => string;
27
+ export declare const resolveCodexReasoningEffort: ({ deepThink, modelConfig, }: {
28
+ deepThink?: DeepThinkOption;
29
+ modelConfig: IModelConfig;
30
+ }) => CodexReasoningEffort | undefined;
31
+ export declare const buildCodexTurnPayloadFromMessages: (messages: ChatCompletionMessageParam[]) => {
32
+ developerInstructions?: string;
33
+ input: CodexTurnInput[];
34
+ };
35
+ export declare function callAIWithCodexAppServer(messages: ChatCompletionMessageParam[], modelConfig: IModelConfig, options?: {
36
+ stream?: boolean;
37
+ onChunk?: StreamingCallback;
38
+ deepThink?: DeepThinkOption;
39
+ abortSignal?: AbortSignal;
40
+ }): Promise<CodexTurnResult>;
41
+ export declare function __shutdownCodexAppServerForTests(): Promise<void>;
42
+ export {};
@@ -0,0 +1,2 @@
1
+ import type { IModelConfig } from '@godscene/shared/env';
2
+ export declare function shouldForceOriginalImageDetail(modelConfig: Pick<IModelConfig, 'intent' | 'modelFamily'>): boolean;
@@ -0,0 +1,60 @@
1
+ import type { AIUsageInfo, DeepThinkOption } from '../../types';
2
+ import type { StreamingCallback } from '../../types';
3
+ export declare class AIResponseParseError extends Error {
4
+ usage?: AIUsageInfo;
5
+ rawResponse: string;
6
+ constructor(message: string, rawResponse: string, usage?: AIUsageInfo);
7
+ }
8
+ import { type IModelConfig, type TModelFamily } from '@godscene/shared/env';
9
+ import type { ChatCompletionMessageParam } from 'openai/resources/index';
10
+ import type { AIArgs } from '../../common';
11
+ export declare function yhtCallAI(messages: ChatCompletionMessageParam[], modelConfig: IModelConfig, options?: {
12
+ stream?: boolean;
13
+ onChunk?: StreamingCallback;
14
+ deepThink?: DeepThinkOption;
15
+ abortSignal?: AbortSignal;
16
+ }): Promise<{
17
+ content: string;
18
+ reasoning_content?: string;
19
+ usage?: AIUsageInfo;
20
+ isStreamed: boolean;
21
+ }>;
22
+ export declare function callAI(messages: ChatCompletionMessageParam[], modelConfig: IModelConfig, options?: {
23
+ stream?: boolean;
24
+ onChunk?: StreamingCallback;
25
+ deepThink?: DeepThinkOption;
26
+ abortSignal?: AbortSignal;
27
+ }): Promise<{
28
+ content: string;
29
+ reasoning_content?: string;
30
+ usage?: AIUsageInfo;
31
+ isStreamed: boolean;
32
+ }>;
33
+ export declare function callAIWithObjectResponse<T>(messages: ChatCompletionMessageParam[], modelConfig: IModelConfig, options?: {
34
+ deepThink?: DeepThinkOption;
35
+ abortSignal?: AbortSignal;
36
+ }): Promise<{
37
+ content: T;
38
+ contentString: string;
39
+ usage?: AIUsageInfo;
40
+ reasoning_content?: string;
41
+ }>;
42
+ export declare function callAIWithStringResponse(msgs: AIArgs, modelConfig: IModelConfig, options?: {
43
+ abortSignal?: AbortSignal;
44
+ }): Promise<{
45
+ content: string;
46
+ usage?: AIUsageInfo;
47
+ }>;
48
+ export declare function extractJSONFromCodeBlock(response: string): string;
49
+ export declare function preprocessDoubaoBboxJson(input: string): string;
50
+ export declare function resolveReasoningConfig({ reasoningEnabled, reasoningEffort, reasoningBudget, modelFamily, }: {
51
+ reasoningEnabled?: boolean;
52
+ reasoningEffort?: string;
53
+ reasoningBudget?: number;
54
+ modelFamily?: TModelFamily;
55
+ }): {
56
+ config: Record<string, unknown>;
57
+ debugMessage?: string;
58
+ warningMessage?: string;
59
+ };
60
+ export declare function safeParseJson(input: string, modelFamily: TModelFamily | undefined): any;
@@ -0,0 +1,32 @@
1
+ import type { IModelConfig } from '@godscene/shared/env';
2
+ /**
3
+ * Default hard timeout (ms) applied to every AI HTTP call.
4
+ *
5
+ * We need an end-to-end timeout for the whole request lifecycle, not just the
6
+ * time until response headers arrive. Some providers can return headers
7
+ * quickly and then stall while the body is still being read.
8
+ *
9
+ * Override per intent via `MIDSCENE_MODEL_TIMEOUT`,
10
+ * `MIDSCENE_INSIGHT_MODEL_TIMEOUT`, or `MIDSCENE_PLANNING_MODEL_TIMEOUT`.
11
+ * Set the env var (or `modelConfig.timeout`) to `0` to disable the hard
12
+ * timeout entirely; only a caller-provided `abortSignal` will cancel the
13
+ * request in that case.
14
+ */
15
+ export declare const DEFAULT_AI_CALL_TIMEOUT_MS = 180000;
16
+ /** Identifying code set on the AbortError raised by our hard timeout. */
17
+ export declare const AI_CALL_HARD_TIMEOUT_CODE = "AI_CALL_HARD_TIMEOUT";
18
+ /**
19
+ * Resolve the hard request timeout for an AI call.
20
+ * Returns `null` when the user explicitly opted out (`timeout === 0`).
21
+ */
22
+ export declare function resolveEffectiveTimeoutMs(modelConfig: Pick<IModelConfig, 'timeout'>): number | null;
23
+ /**
24
+ * True if the error was raised by our hard-timeout AbortSignal (vs any other
25
+ * abort/network/HTTP error). Used to drive observability without having to
26
+ * string-match the message.
27
+ */
28
+ export declare function isHardTimeoutError(err: unknown): boolean;
29
+ export declare function buildRequestAbortSignal(timeoutMs: number | null, userSignal?: AbortSignal): {
30
+ signal: AbortSignal;
31
+ cleanup: () => void;
32
+ };
@@ -0,0 +1,72 @@
1
+ import type { PlanningAIResponse, UIContext } from '../types';
2
+ import { type IModelConfig } from '@godscene/shared/env';
3
+ import type { ConversationHistory } from './conversation-history';
4
+ type ActionType = 'click' | 'left_double' | 'right_single' | 'drag' | 'type' | 'hotkey' | 'finished' | 'scroll' | 'wait';
5
+ export declare function uiTarsPlanning(userInstruction: string, options: {
6
+ conversationHistory: ConversationHistory;
7
+ context: UIContext;
8
+ modelConfig: IModelConfig;
9
+ actionContext?: string;
10
+ abortSignal?: AbortSignal;
11
+ }): Promise<PlanningAIResponse>;
12
+ interface BaseAction {
13
+ action_type: ActionType;
14
+ action_inputs: Record<string, any>;
15
+ reflection: string | null;
16
+ thought: string | null;
17
+ }
18
+ interface ClickAction extends BaseAction {
19
+ action_type: 'click';
20
+ action_inputs: {
21
+ start_box: string;
22
+ };
23
+ }
24
+ interface DragAction extends BaseAction {
25
+ action_type: 'drag';
26
+ action_inputs: {
27
+ start_box: string;
28
+ end_box: string;
29
+ };
30
+ }
31
+ interface WaitAction extends BaseAction {
32
+ action_type: 'wait';
33
+ action_inputs: {
34
+ time: string;
35
+ };
36
+ }
37
+ interface LeftDoubleAction extends BaseAction {
38
+ action_type: 'left_double';
39
+ action_inputs: {
40
+ start_box: string;
41
+ };
42
+ }
43
+ interface RightSingleAction extends BaseAction {
44
+ action_type: 'right_single';
45
+ action_inputs: {
46
+ start_box: string;
47
+ };
48
+ }
49
+ interface TypeAction extends BaseAction {
50
+ action_type: 'type';
51
+ action_inputs: {
52
+ content: string;
53
+ };
54
+ }
55
+ interface HotkeyAction extends BaseAction {
56
+ action_type: 'hotkey';
57
+ action_inputs: {
58
+ key: string;
59
+ };
60
+ }
61
+ interface ScrollAction extends BaseAction {
62
+ action_type: 'scroll';
63
+ action_inputs: {
64
+ direction: 'up' | 'down';
65
+ };
66
+ }
67
+ interface FinishedAction extends BaseAction {
68
+ action_type: 'finished';
69
+ action_inputs: Record<string, never>;
70
+ }
71
+ export type Action = ClickAction | LeftDoubleAction | RightSingleAction | DragAction | TypeAction | HotkeyAction | ScrollAction | FinishedAction | WaitAction;
72
+ export {};