computer-use-ootb-internal 0.0.108__py3-none-any.whl → 0.0.109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. computer_use_ootb_internal/app_teachmode.py +8 -14
  2. computer_use_ootb_internal/app_teachmode_gradio.py +1 -1
  3. computer_use_ootb_internal/computer_use_demo/animation/click_animation.py +1 -1
  4. computer_use_ootb_internal/computer_use_demo/executor/teachmode_executor.py +70 -72
  5. computer_use_ootb_internal/computer_use_demo/tools/base.py +4 -2
  6. computer_use_ootb_internal/computer_use_demo/tools/computer.py +55 -43
  7. computer_use_ootb_internal/run_teachmode_ootb_args.py +23 -11
  8. {computer_use_ootb_internal-0.0.108.dist-info → computer_use_ootb_internal-0.0.109.dist-info}/METADATA +1 -1
  9. {computer_use_ootb_internal-0.0.108.dist-info → computer_use_ootb_internal-0.0.109.dist-info}/RECORD +11 -16
  10. computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/gui_parser.py +0 -676
  11. computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/icon_detection/icon_detection.py +0 -253
  12. computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/panel_recognition/llm_panel_recognize.py +0 -170
  13. computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/test_capture.py +0 -8
  14. computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/uia_parser.py +0 -0
  15. {computer_use_ootb_internal-0.0.108.dist-info → computer_use_ootb_internal-0.0.109.dist-info}/WHEEL +0 -0
  16. {computer_use_ootb_internal-0.0.108.dist-info → computer_use_ootb_internal-0.0.109.dist-info}/entry_points.txt +0 -0
@@ -1,676 +0,0 @@
1
- import cv2
2
- import numpy as np
3
- import datetime
4
- import requests
5
- import importlib
6
- # import textdistance
7
- import glob
8
- from PIL import Image
9
- import ctypes
10
-
11
- from .text_detection import text_detection
12
- from .icon_detection.icon_detection import detect_icons, IconDetector
13
- from .utils import *
14
-
15
- import os
16
-
17
- from importlib import resources
18
- import pathlib
19
- import computer_use_ootb_internal
20
-
21
- class GUIParser():
22
- name = "gui_parser"
23
- description = """
24
- This tool can extract the information of screenshot.
25
- Invoke command: gui_parser(query, visual[i])
26
- :param query -> str, specific command. visual[i] -> image, the latest screenshot.
27
- """
28
-
29
- def __init__(self, cache_folder=".cache/", user_id=None, trace_id=None, scaleFactor="1.0x"):
30
- # Use pathlib for platform-independent path handling
31
- cache_path = pathlib.Path(cache_folder)
32
- cache_path.mkdir(exist_ok=True)
33
- ocr_cache_path = cache_path / "ocr"
34
- ocr_cache_path.mkdir(exist_ok=True)
35
-
36
- super(GUIParser, self).__init__()
37
- self.cache_folder = str(cache_path)
38
- print(f"Cache folder: {self.cache_folder}")
39
-
40
- self.task_id = get_current_time()
41
- self.parsers = {}
42
- self.temperature = 0
43
- self.gui_parser = None
44
- self.icon_template_threshold = 0.7
45
- try:
46
- ootb_path = os.getenv("OOTB_PATH")
47
- if ootb_path:
48
- self.ootb_database_path = pathlib.Path(ootb_path) / "ootbdatabase"
49
- else:
50
- # Fallback to package resources if env var not set
51
- with resources.path(__package__, "ootbdatabase") as db_path:
52
- self.ootb_database_path = db_path
53
- except Exception as e:
54
- print(f"Error: {e}. OOTB_PATH is not set correctly.")
55
- self.user_id = user_id
56
- self.trace_id = trace_id
57
- self.scaleFactor = scaleFactor
58
- self.width = None
59
- self.height = None
60
-
61
- def __call__(self, uia_data=None, screenshot_path=None, query=None, mode="uia", ocr_mode="googleocr"):
62
- screenshot_path = pathlib.Path(screenshot_path)
63
- if not screenshot_path.exists():
64
- raise FileNotFoundError(f"Screenshot not found: {screenshot_path}")
65
-
66
- image = Image.open(screenshot_path)
67
- self.width, self.height = image.size
68
-
69
- if mode == "teach":
70
- if query and isinstance(query, list) and len(query) > 1:
71
- specific_icon_names = [item for item in query if self.is_image_path(item)]
72
- if specific_icon_names:
73
- icon_template_folder = self.ootb_database_path / self.user_id / self.trace_id / "icons"
74
- icon_template_folder.mkdir(parents=True, exist_ok=True)
75
-
76
- print(f"icon_template_folder: {icon_template_folder}")
77
- icon_detector = IconDetector(str(icon_template_folder))
78
-
79
- detected_items = icon_detector(
80
- screenshot_path=screenshot_path,
81
- mode="teach",
82
- threshold=self.icon_template_threshold, # TODO: setting on init
83
- scale_factor=self.scaleFactor,
84
- specific_icon_names=specific_icon_names,
85
- )
86
-
87
- if detected_items:
88
- icon_items = detected_items
89
- print("detected_icons:", icon_items)
90
- # 获取OCR文本结果
91
- ocr_elements = []
92
- if ocr and "texts" in ocr:
93
- for text_item in ocr["texts"]:
94
- ocr_elements.append({
95
- "name": text_item["content"],
96
- "rectangle": text_item["bbox"],
97
- "is_text": True
98
- })
99
-
100
- # 合并所有元素
101
- all_elements = []
102
- all_elements.extend(icon_items) # 添加图标元素
103
- all_elements.extend(ocr_elements) # 添加文本元素
104
-
105
- # 创建主面板
106
- ocr_icon_parsed_gui = {
107
- "Screen": [{
108
- "name": "Main Content",
109
- "rectangle": [0, 0, self.width, self.height],
110
- "elements": self.merge_elements({"combined_elements": all_elements})
111
- }]
112
- }
113
-
114
- # get uia parsed gui
115
- uia_parsed_gui = self.get_panel_uia(uia_data)
116
- self.postprocess_uia(uia_parsed_gui)
117
-
118
- # merge the code
119
- self.parsed_gui = self.merge_uia_and_teach_results(uia_parsed_gui, ocr_icon_parsed_gui)
120
- return self.parsed_gui
121
-
122
- # only use uia mode
123
- elif mode == "uia":
124
- # Parse the UI elements of the current window
125
- self.parsed_gui = {}
126
- self.exclude_class_name_list = []
127
- # if self.if_browser(window_name):
128
- # self.exclude_class_name_list = [
129
- # "Custom",
130
- # "Menu",
131
- # "Pane",
132
- # "TabControl",
133
- # "DataItem",
134
- # ]
135
- # elif self.if_office(window_name):
136
- # self.exclude_class_name_list = [
137
- # "Custom",
138
- # "Menu",
139
- # "Pane",
140
- # "Toolbar",
141
- # "TabControl",
142
- # "TreeItem",
143
- # "DataItem",
144
- # "Hyperlink",
145
- # ]
146
- # else:
147
- # self.exclude_class_name_list = []
148
-
149
- self.parsed_gui = self.get_panel_uia(uia_data)
150
- self.postprocess_uia(self.parsed_gui)
151
-
152
- return self.parsed_gui
153
-
154
- # OCR + full icon detection mode
155
- elif mode == "icon_ocr":
156
- ocr = text_detection(screenshot_path)
157
- icon_template_folder = os.path.join(self.ootb_database_path, "ootbdatabase/teach_mode")
158
- icon_detector = IconDetector(icon_template_folder)
159
- icon_items = [
160
- icon_detector(
161
- screenshot_path=screenshot_path, mode="full_detect", threshold=0.6, scale_factor=self.scaleFactor
162
- )
163
- ]
164
- self.parsed_gui = self.postprocess_icon(
165
- "screen", icon_items, screenshot_path
166
- )
167
- for panel_item in self.parsed_gui["screen"]:
168
- temp = {}
169
- temp["editing_control"] = self.get_text(
170
- panel_item, ocr, screenshot_path
171
- )
172
-
173
- panel_item["elements"] += self.merge_elements(temp)
174
-
175
- # OCR only mode
176
- else:
177
- ocr = text_detection(screenshot_path)
178
- self.parsed_gui = self.postprocess_icon(
179
- "screen", [], screenshot_path
180
- )
181
- for panel_item in self.parsed_gui["screen"]:
182
- temp = {}
183
- temp["editing_control"] = self.get_text(
184
- panel_item, ocr, screenshot_path
185
- )
186
-
187
- panel_item["elements"] += self.merge_elements(temp)
188
-
189
-
190
- return self.parsed_gui
191
-
192
-
193
-
194
- def if_browser(self, window_name):
195
- # check if ["Edge", "Chrome", "Firefox", "Safari", "Opera"] in window_name
196
- if any(browser in window_name for browser in ["Edge", "Chrome", "Firefox", "Safari", "Opera"]):
197
- return True
198
- return False
199
-
200
- def if_adobe(self, window_name):
201
- # check if ["Adobe Premiere Pro", "Adobe After Effect", "Adobe Acrobat"] in window_name
202
- if any(adobe in window_name for adobe in ["Adobe Premiere Pro", "Adobe After Effect", "Adobe Acrobat"]):
203
- return True
204
- return False
205
-
206
- def if_office(self, window_name):
207
- # check if ["Excel", "Word", "PowerPoint", "Outlook"] in window_name
208
- if any(office in window_name for office in ["Excel", "Word", "PowerPoint", "Outlook"]):
209
- return True
210
- return False
211
-
212
- @staticmethod
213
- def is_image_path(text):
214
- # Checking if the input text ends with typical image file extensions
215
- image_extensions = (".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".tif")
216
- if text.endswith(image_extensions):
217
- return True
218
- else:
219
- return False
220
-
221
- @staticmethod
222
- def get_text(panel_item, ocr, screenshot_path):
223
- # Step1: Find all the texts in the panel
224
- panel_rect = panel_item["rectangle"]
225
- # 过滤出在panel区域内的文本
226
- panel_texts = []
227
- for item in ocr["texts"]:
228
- bbox = item["bbox"]
229
- # 检查文本是否在panel区域内
230
- if (bbox[0] >= panel_rect[0] and
231
- bbox[1] >= panel_rect[1] and
232
- bbox[2] <= panel_rect[2] and
233
- bbox[3] <= panel_rect[3]):
234
- panel_texts.append(item)
235
-
236
- if not panel_texts:
237
- return []
238
-
239
- # obtain information about panel texts
240
- sorted_panel_texts = sorted(
241
- panel_texts, key=lambda x: (x["bbox"][1], x["bbox"][0])
242
- )
243
-
244
- # Step 2: Identify rows by grouping elements that are in approximately the same vertical position.
245
- editing_controls = []
246
- current_row = []
247
- if sorted_panel_texts:
248
- previous_y = sorted_panel_texts[0]["bbox"][1]
249
-
250
- for item in sorted_panel_texts:
251
- y1 = item["bbox"][1]
252
- # If the vertical position of the current item is significantly different from the previous one, start a new row.
253
- if abs(y1 - previous_y) > 15:
254
- if current_row: # 只有当current_row非空时才添加
255
- editing_controls.append(current_row)
256
- current_row = []
257
-
258
- current_row.append(
259
- {
260
- "name": item["content"],
261
- "rectangle": item["bbox"],
262
- }
263
- )
264
- previous_y = y1
265
-
266
- # Add the last row if not empty.
267
- if current_row:
268
- editing_controls.append(current_row)
269
-
270
- # Step 3: Sort elements within each row based on their horizontal position.
271
- for i, row in enumerate(editing_controls):
272
- editing_controls[i] = sorted(row, key=lambda x: x["rectangle"][0])
273
-
274
- return editing_controls
275
-
276
- def postprocess_icon(self, window_name, icon_items, screenshot_path):
277
- image = Image.open(screenshot_path)
278
- width, height = image.size
279
- result = {
280
- window_name: [
281
- {
282
- "name": "Main Content",
283
- "rectangle": [0, 0, width, height],
284
- "elements": None
285
- }
286
- ]
287
- }
288
- return result
289
-
290
- @staticmethod
291
- def merge_elements(panel_item):
292
- # 检查是否为空
293
- if not panel_item or not any(panel_item.values()):
294
- return []
295
-
296
- # 收集所有元素
297
- all_elements = []
298
- for key, value in panel_item.items():
299
- if isinstance(value, list):
300
- all_elements.extend(value)
301
-
302
- if not all_elements:
303
- return []
304
-
305
- # 按Y轴分组,同一行的阈值为15像素
306
- y_sorted = sorted(all_elements, key=lambda x: (x["rectangle"][1] + x["rectangle"][3]) / 2)
307
- merged_control = []
308
- current_row = []
309
- previous_y = None
310
-
311
- for element in y_sorted:
312
- y_center = (element["rectangle"][1] + element["rectangle"][3]) / 2
313
-
314
- if previous_y is not None and abs(y_center - previous_y) > 15:
315
- if current_row:
316
- # 对当前行按X轴排序
317
- current_row.sort(key=lambda x: x["rectangle"][0])
318
- merged_control.append(current_row)
319
- current_row = []
320
-
321
- current_row.append(element)
322
- previous_y = y_center
323
-
324
- # 添加最后一行
325
- if current_row:
326
- current_row.sort(key=lambda x: x["rectangle"][0])
327
- merged_control.append(current_row)
328
-
329
- # 对所有行按Y轴中心点排序
330
- merged_control.sort(
331
- key=lambda row: (row[0]["rectangle"][1] + row[0]["rectangle"][3]) / 2
332
- )
333
-
334
- return merged_control
335
-
336
- def postprocess_uia(self, metadata):
337
- # iterate each window
338
- for window_data in metadata.values():
339
-
340
- for element_data in window_data:
341
- # get all elements of current level
342
- elements = element_data.get("elements", [])
343
-
344
- # if the class name is TitleBar, regain the rectangle of the whole panel
345
- if element_data["class_name"] == "TitleBar" and len(elements) > 0:
346
- left = min(e.get("rectangle", [0, 0, 0, 0])[0] for e in elements)
347
- top = min(e.get("rectangle", [0, 0, 0, 0])[1] for e in elements)
348
- right = max(e.get("rectangle", [0, 0, 0, 0])[2] for e in elements)
349
- bottom = max(e.get("rectangle", [0, 0, 0, 0])[3] for e in elements)
350
- element_data["rectangle"] = [left, top, right, bottom]
351
- # sort elements by x and y
352
- element_data["elements"] = sort_elements_by_xy(elements)
353
-
354
- return metadata
355
-
356
- def merge_uia_and_teach_results(self,uia_result, teach_result):
357
- def calculate_overlap(rect1, rect2):
358
- # 计算两个矩形的重叠面积
359
- x1 = max(rect1[0], rect2[0])
360
- y1 = max(rect1[1], rect2[1])
361
- x2 = min(rect1[2], rect2[2])
362
- y2 = min(rect1[3], rect2[3])
363
-
364
- if x1 >= x2 or y1 >= y2:
365
- return 0
366
-
367
- overlap_width = x2 - x1
368
- overlap_height = y2 - y1
369
-
370
- return overlap_width * overlap_height
371
-
372
- def is_significant_overlap(ocr_element, uia_element):
373
- ocr_rect = ocr_element["rectangle"]
374
- uia_rect = uia_element["rectangle"]
375
-
376
- overlap_area = calculate_overlap(ocr_rect, uia_rect)
377
- ocr_area = (ocr_rect[2] - ocr_rect[0]) * (ocr_rect[3] - ocr_rect[1])
378
-
379
- return overlap_area > 0.5 * ocr_area if ocr_area > 0 else False
380
-
381
- merged_result = uia_result.copy()
382
-
383
- all_ocr_elements = []
384
- # 遍历teach模式下的OCR文本元素
385
- for screen in teach_result.get("Screen", []):
386
- for elements_row in screen.get("elements", []):
387
- for element in elements_row:
388
- if element["is_text"]:
389
- should_add = True
390
- # 检查是否与任何UIA元素有显著重叠
391
- for window_name, panels in merged_result.items():
392
- for panel in panels:
393
- for elements_list in panel.get("elements", []):
394
- for uia_element in elements_list:
395
- if is_significant_overlap(element, uia_element):
396
- should_add = False
397
- break
398
- if not should_add:
399
- break
400
- if not should_add:
401
- break
402
- if not should_add:
403
- break
404
-
405
- # 如果没有显著重叠,将OCR元素添加到最合适的面板中
406
- if should_add:
407
- # 找到最合适的面板(这里简单地添加到第一个面板)
408
- all_ocr_elements.append(element)
409
- else:
410
- all_ocr_elements.append(element)
411
-
412
- merged_result["OCR"] = [{
413
- "name": "Main Content",
414
- "rectangle": [0, 0, self.width, self.height],
415
- "elements": [all_ocr_elements]
416
- }]
417
- return merged_result
418
- def get_panel_uia(self, control_info_list):
419
- # 定义文本到面板名称的映射
420
- text2panel_name = {
421
- "新建会话属性": "New session properties",
422
- "会话管理器": "Session manager",
423
- "任务栏": "Taskbar",
424
- }
425
-
426
- # 递归处理控件信息
427
- def recurse_controls(control_info, dialog_components, depth, software_name):
428
- children = control_info.get("children", [])
429
- if not children:
430
- return
431
-
432
- for child_control in children:
433
- child_properties = child_control.get("properties", {})
434
- child_friendly_class_name = child_properties.get("friendly_class_name", "")
435
-
436
- # 如果控件类型在排除列表中,跳过处理
437
- if child_friendly_class_name in self.exclude_class_name_list:
438
- continue
439
-
440
- # 获取控件名称
441
- child_texts = child_properties.get("texts", [])
442
- if not child_texts:
443
- child_properties_name = ""
444
- elif isinstance(child_texts[0], list):
445
- result = []
446
- for item in child_texts:
447
- if isinstance(item, list) and item and isinstance(item[0], str):
448
- result.append("".join(item))
449
- elif isinstance(item, str):
450
- result.append(item)
451
- child_properties_name = "".join(result)
452
- else:
453
- child_properties_name = child_texts[0]
454
-
455
- # 特殊处理编辑框和组合框
456
- if child_friendly_class_name in ["Edit", "ComboBox"] and not child_properties_name:
457
- child_properties_name = "Search Bar"
458
-
459
- # 判断是否需要处理该控件
460
- rectangle = child_properties.get("rectangle", [0, 0, 0, 0])
461
- automation_id = child_properties.get("automation_id", "")
462
-
463
- # 放宽控件过滤条件
464
- browser_list = ["Edge", "Chrome", "Firefox", "Opera", "Safari"]
465
- is_browser = False
466
- for browser in browser_list:
467
- if browser in software_name:
468
- is_browser = True
469
- break
470
- should_process = (
471
- # 任务栏特殊处理
472
- (software_name == "Taskbar" and child_friendly_class_name in ["Button", "Static", "Image"]) or
473
- # 浏览器特殊处理
474
- (is_browser and child_friendly_class_name in ["Pane", "Button", "Edit", "Tab", "TabItem", "Document", "Group"]) or
475
- # 常规控件处理
476
- (not child_control.get("children", []) or # 没有子控件的元素保留
477
- automation_id or # 有automation_id的控件保留
478
- child_friendly_class_name not in self.exclude_class_name_list) # 不在排除列表中的控件保留
479
- ) and (
480
- # 基本条件检查
481
- len(rectangle) == 4 and
482
- not all(element == 0 for element in rectangle) and
483
- (child_properties_name not in ["", '"'] or automation_id) # 允许有automation_id的空名称控件
484
- )
485
-
486
- if should_process:
487
- # 获取控件的矩形边界
488
- left, top, right, bottom = rectangle
489
- dialog_rect = dialog_components.get("rectangle", [0, 0, 0, 0])
490
- if len(dialog_rect) != 4:
491
- dialog_rect = [0, 0, 0, 0]
492
- left_bound, top_bound, right_bound, bottom_bound = dialog_rect
493
-
494
- # 根据软件名称进行特殊逻辑处理
495
- if software_name not in ["Taskbar"] and not is_browser: # 任务栏和浏览器不裁剪坐标
496
- child_properties["rectangle"] = [
497
- max(left, left_bound),
498
- max(top, top_bound),
499
- min(right, right_bound),
500
- min(bottom, bottom_bound)
501
- ]
502
- else:
503
- child_properties["rectangle"] = [left, top, right, bottom]
504
-
505
- # 检查矩形有效性
506
- if (child_properties["rectangle"][0] < child_properties["rectangle"][2]
507
- and child_properties["rectangle"][1] < child_properties["rectangle"][3]):
508
-
509
- # 添加控件信息到对话框组件
510
- element_info = {
511
- "name": child_properties_name.replace("\u200b", ""),
512
- "rectangle": child_properties["rectangle"],
513
- "class_name": child_friendly_class_name,
514
- "type": ["Click", "rightClick"],
515
- "depth": f"{depth}-{dialog_components.get('count', 1)}"
516
- }
517
-
518
- # 添加automation_id(如果存在)
519
- if automation_id:
520
- element_info["automation_id"] = automation_id
521
-
522
- dialog_components.setdefault("elements", []).append(element_info)
523
- dialog_components["count"] = dialog_components.get("count", 1) + 1
524
-
525
- # 递归处理子控件
526
- recurse_controls(child_control, dialog_components, depth, software_name)
527
-
528
- dialog_components = {}
529
-
530
- # 遍历每个软件的控件信息
531
- for software_name, controls in control_info_list.items():
532
- if not controls: # 跳过空的控件列表
533
- continue
534
-
535
- dialog_components[software_name] = []
536
-
537
- for control_info in controls:
538
- # 判断是否为需要处理的对话框类型
539
- control_properties = control_info.get("properties", {})
540
- friendly_class_name = control_properties.get("friendly_class_name", "")
541
-
542
- # 放宽面板类型的限制
543
- panel_types = ["Dialog", "Pane", "GroupBox", "TitleBar", "Menu", "Document", "ListBox", "AppBar", "Tab", "Group"]
544
- if (friendly_class_name in panel_types and control_info.get("children")):
545
-
546
- control_texts = control_properties.get("texts", [""])
547
- control_name = control_texts[0] if control_texts else ""
548
- automation_id = control_properties.get("automation_id", "")
549
-
550
- # 设置控件名称
551
- if not control_name and automation_id:
552
- control_name = automation_id # 使用automation_id作为备选名称
553
- elif not control_name:
554
- if friendly_class_name == "TitleBar":
555
- control_name = "Title Bar"
556
- elif friendly_class_name == "Document":
557
- control_name = "Main Content"
558
- elif friendly_class_name == "Pane":
559
- if software_name == "Taskbar":
560
- control_name = "Taskbar"
561
- elif "Edge" in software_name or "Chrome" in software_name or "Firefox" in software_name:
562
- control_name = "Browser Content"
563
- else:
564
- control_name = "Navigation Bar" if software_name in ["web", "web video"] else "Main Content"
565
- elif friendly_class_name == "Document" and software_name in ["web", "web video"]:
566
- control_name = "Main Content"
567
-
568
- # 转换文本名称
569
- control_name = text2panel_name.get(control_name, control_name)
570
-
571
- # 初始化对话框组件并递归处理控件
572
- panel_info = {
573
- "name": control_name,
574
- "rectangle": control_properties.get("rectangle", [0, 0, 0, 0]),
575
- "class_name": friendly_class_name,
576
- "depth": "1",
577
- "elements": [],
578
- "count": 1
579
- }
580
-
581
- # 添加automation_id(如果存在)
582
- if automation_id:
583
- panel_info["automation_id"] = automation_id
584
-
585
- dialog_components[software_name].append(panel_info)
586
- recurse_controls(control_info, dialog_components[software_name][-1], "1", software_name)
587
-
588
- return dialog_components
589
-
590
- import matplotlib.pyplot as plt
591
- import cv2
592
- import matplotlib.font_manager as fm
593
-
594
- def show_img_with_box_points(img, box, ax, color="red"):
595
- x0, y0 = box[0], box[1]
596
- w, h = box[2] - box[0], box[3] - box[1]
597
- ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor=color, facecolor=(0, 0, 0, 0), lw=0.4))
598
-
599
- def extract_second_underscore_content(filename):
600
- parts = filename.split('_')
601
- if len(parts) > 1 and "@" in filename:
602
- return parts[1]
603
- return filename
604
-
605
- def show_guiParserd(gui, img_path):
606
- img = cv2.imread(img_path)
607
- img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
608
- plt.figure(dpi=500)
609
- plt.axis('off')
610
- plt.imshow(img)
611
-
612
- # 设置微软雅黑字体
613
- # font_path = "C:/Windows/Fonts/msyh.ttc" # Windows系统上的微软雅黑字体路径
614
- # prop = fm.FontProperties(fname=font_path)
615
- item_cache = []
616
- for windows_name, panel_list in gui.items():
617
- print("windows_name: ", windows_name)
618
- print("panel_list: ", panel_list)
619
- for sequence in panel_list:
620
- # print("sequence: ", sequence)
621
- show_img_with_box_points(img, sequence["rectangle"], plt.gca(), color="green")
622
- if len(sequence["name"]) < 100:
623
- plt.text(sequence["rectangle"][0], sequence["rectangle"][1], sequence["name"],
624
- fontsize=4, color='green')
625
- for _, items in enumerate(sequence['elements']):
626
- item_cache.extend(items)
627
- for item in item_cache:
628
- try:
629
- if "rectangle" in item:
630
- if len(item["name"]) > 200:
631
- continue
632
- box = item["rectangle"]
633
- if "current_step_icon" in item.keys() and item["current_step_icon"]:
634
- show_img_with_box_points(img, box, plt.gca(),color="green")
635
- plt.text(box[0], box[3]+25, "Current Step Icon", fontsize=3, color='green')
636
- else:
637
- show_img_with_box_points(img, box, plt.gca(),color="blue")
638
- plt.text(box[0], box[1], extract_second_underscore_content(item["name"]),
639
- fontsize=2, color='red')
640
- except Exception as e:
641
- print(e)
642
- # plt.savefig('gui_parser5.png', bbox_inches='tight', pad_inches=0)
643
- plt.show()
644
-
645
-
646
-
647
- def get_screen_info():
648
- try:
649
- scaleFactor = ctypes.windll.shcore.GetScaleFactorForDevice(0) / 100
650
- scaleFactor = str(scaleFactor) + "x"
651
- except Exception as e:
652
- print(e)
653
- scaleFactor = "1.0x"
654
- return scaleFactor
655
-
656
- def parse_gui(user_id, trace_id, screenshot_path, user_scaleFactor="auto", uia_data=None, query=None, mode="uia", ocr_mode="googleocr"):
657
- # TODO: check mult-screens
658
- if user_scaleFactor == "auto":
659
- user_scaleFactor = get_screen_info()
660
- print("user_scaleFactor: ", user_scaleFactor)
661
- gui_parser = GUIParser(user_id=user_id, trace_id=trace_id, scaleFactor=user_scaleFactor)
662
- return gui_parser(uia_data=uia_data, screenshot_path=screenshot_path, query=query, mode=mode, ocr_mode=ocr_mode)
663
-
664
- import json
665
- import sys
666
- if __name__ == "__main__":
667
-
668
- screenshot_path = r'D:\develop\computer_use_ootb_internal-main\.cache\20241222_041910\screenshot-0.png'
669
- meta_data = json.load(open("test_uia_data.json", "r"))
670
- gui_parser = GUIParser(user_id="test", trace_id="test", scaleFactor="1.0x")
671
- uia_gui = gui_parser.get_panel_uia(meta_data)
672
- uia_gui = gui_parser.postprocess_uia(uia_gui)
673
- with open("test_uia_gui.json", "w") as f:
674
- json.dump(uia_gui, f, indent=4)
675
-
676
-