autoglm-gui 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. AutoGLM_GUI/__init__.py +11 -0
  2. AutoGLM_GUI/__main__.py +26 -8
  3. AutoGLM_GUI/actions/__init__.py +6 -0
  4. AutoGLM_GUI/actions/handler.py +196 -0
  5. AutoGLM_GUI/actions/types.py +15 -0
  6. AutoGLM_GUI/adb/__init__.py +53 -0
  7. AutoGLM_GUI/adb/apps.py +227 -0
  8. AutoGLM_GUI/adb/connection.py +323 -0
  9. AutoGLM_GUI/adb/device.py +171 -0
  10. AutoGLM_GUI/adb/input.py +67 -0
  11. AutoGLM_GUI/adb/screenshot.py +11 -0
  12. AutoGLM_GUI/adb/timing.py +167 -0
  13. AutoGLM_GUI/adb_plus/keyboard_installer.py +4 -2
  14. AutoGLM_GUI/adb_plus/qr_pair.py +8 -8
  15. AutoGLM_GUI/adb_plus/screenshot.py +22 -1
  16. AutoGLM_GUI/adb_plus/serial.py +38 -20
  17. AutoGLM_GUI/adb_plus/touch.py +4 -9
  18. AutoGLM_GUI/agents/__init__.py +51 -0
  19. AutoGLM_GUI/agents/events.py +19 -0
  20. AutoGLM_GUI/agents/factory.py +153 -0
  21. AutoGLM_GUI/agents/glm/__init__.py +7 -0
  22. AutoGLM_GUI/agents/glm/agent.py +292 -0
  23. AutoGLM_GUI/agents/glm/message_builder.py +81 -0
  24. AutoGLM_GUI/agents/glm/parser.py +110 -0
  25. AutoGLM_GUI/agents/glm/prompts_en.py +77 -0
  26. AutoGLM_GUI/agents/glm/prompts_zh.py +75 -0
  27. AutoGLM_GUI/agents/mai/__init__.py +28 -0
  28. AutoGLM_GUI/agents/mai/agent.py +405 -0
  29. AutoGLM_GUI/agents/mai/parser.py +254 -0
  30. AutoGLM_GUI/agents/mai/prompts.py +103 -0
  31. AutoGLM_GUI/agents/mai/traj_memory.py +91 -0
  32. AutoGLM_GUI/agents/protocols.py +27 -0
  33. AutoGLM_GUI/agents/stream_runner.py +188 -0
  34. AutoGLM_GUI/api/__init__.py +71 -11
  35. AutoGLM_GUI/api/agents.py +190 -229
  36. AutoGLM_GUI/api/control.py +9 -6
  37. AutoGLM_GUI/api/devices.py +112 -28
  38. AutoGLM_GUI/api/health.py +13 -0
  39. AutoGLM_GUI/api/history.py +78 -0
  40. AutoGLM_GUI/api/layered_agent.py +306 -181
  41. AutoGLM_GUI/api/mcp.py +11 -10
  42. AutoGLM_GUI/api/media.py +64 -1
  43. AutoGLM_GUI/api/scheduled_tasks.py +98 -0
  44. AutoGLM_GUI/api/version.py +23 -10
  45. AutoGLM_GUI/api/workflows.py +2 -1
  46. AutoGLM_GUI/config.py +72 -14
  47. AutoGLM_GUI/config_manager.py +98 -27
  48. AutoGLM_GUI/device_adapter.py +263 -0
  49. AutoGLM_GUI/device_manager.py +248 -29
  50. AutoGLM_GUI/device_protocol.py +266 -0
  51. AutoGLM_GUI/devices/__init__.py +49 -0
  52. AutoGLM_GUI/devices/adb_device.py +200 -0
  53. AutoGLM_GUI/devices/mock_device.py +185 -0
  54. AutoGLM_GUI/devices/remote_device.py +177 -0
  55. AutoGLM_GUI/exceptions.py +3 -3
  56. AutoGLM_GUI/history_manager.py +164 -0
  57. AutoGLM_GUI/i18n.py +81 -0
  58. AutoGLM_GUI/metrics.py +13 -20
  59. AutoGLM_GUI/model/__init__.py +5 -0
  60. AutoGLM_GUI/model/message_builder.py +69 -0
  61. AutoGLM_GUI/model/types.py +24 -0
  62. AutoGLM_GUI/models/__init__.py +10 -0
  63. AutoGLM_GUI/models/history.py +96 -0
  64. AutoGLM_GUI/models/scheduled_task.py +71 -0
  65. AutoGLM_GUI/parsers/__init__.py +22 -0
  66. AutoGLM_GUI/parsers/base.py +50 -0
  67. AutoGLM_GUI/parsers/phone_parser.py +58 -0
  68. AutoGLM_GUI/phone_agent_manager.py +118 -367
  69. AutoGLM_GUI/platform_utils.py +31 -2
  70. AutoGLM_GUI/prompt_config.py +15 -0
  71. AutoGLM_GUI/prompts/__init__.py +32 -0
  72. AutoGLM_GUI/scheduler_manager.py +304 -0
  73. AutoGLM_GUI/schemas.py +272 -63
  74. AutoGLM_GUI/scrcpy_stream.py +159 -37
  75. AutoGLM_GUI/server.py +3 -1
  76. AutoGLM_GUI/socketio_server.py +114 -29
  77. AutoGLM_GUI/state.py +10 -30
  78. AutoGLM_GUI/static/assets/{about-DeclntHg.js → about-BQm96DAl.js} +1 -1
  79. AutoGLM_GUI/static/assets/alert-dialog-B42XxGPR.js +1 -0
  80. AutoGLM_GUI/static/assets/chat-C0L2gQYG.js +129 -0
  81. AutoGLM_GUI/static/assets/circle-alert-D4rSJh37.js +1 -0
  82. AutoGLM_GUI/static/assets/dialog-DZ78cEcj.js +45 -0
  83. AutoGLM_GUI/static/assets/history-DFBv7TGc.js +1 -0
  84. AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css +1 -0
  85. AutoGLM_GUI/static/assets/{index-zQ4KKDHt.js → index-CmZSnDqc.js} +1 -1
  86. AutoGLM_GUI/static/assets/index-CssG-3TH.js +11 -0
  87. AutoGLM_GUI/static/assets/label-BCUzE_nm.js +1 -0
  88. AutoGLM_GUI/static/assets/logs-eoFxn5of.js +1 -0
  89. AutoGLM_GUI/static/assets/popover-DLsuV5Sx.js +1 -0
  90. AutoGLM_GUI/static/assets/scheduled-tasks-MyqGJvy_.js +1 -0
  91. AutoGLM_GUI/static/assets/square-pen-zGWYrdfj.js +1 -0
  92. AutoGLM_GUI/static/assets/textarea-BX6y7uM5.js +1 -0
  93. AutoGLM_GUI/static/assets/workflows-CYFs6ssC.js +1 -0
  94. AutoGLM_GUI/static/index.html +2 -2
  95. AutoGLM_GUI/types.py +142 -0
  96. {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/METADATA +178 -92
  97. autoglm_gui-1.5.0.dist-info/RECORD +157 -0
  98. mai_agent/base.py +137 -0
  99. mai_agent/mai_grounding_agent.py +263 -0
  100. mai_agent/mai_naivigation_agent.py +526 -0
  101. mai_agent/prompt.py +148 -0
  102. mai_agent/unified_memory.py +67 -0
  103. mai_agent/utils.py +73 -0
  104. AutoGLM_GUI/api/dual_model.py +0 -311
  105. AutoGLM_GUI/dual_model/__init__.py +0 -53
  106. AutoGLM_GUI/dual_model/decision_model.py +0 -664
  107. AutoGLM_GUI/dual_model/dual_agent.py +0 -917
  108. AutoGLM_GUI/dual_model/protocols.py +0 -354
  109. AutoGLM_GUI/dual_model/vision_model.py +0 -442
  110. AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +0 -291
  111. AutoGLM_GUI/phone_agent_patches.py +0 -146
  112. AutoGLM_GUI/static/assets/chat-Iut2yhSw.js +0 -125
  113. AutoGLM_GUI/static/assets/dialog-BfdcBs1x.js +0 -45
  114. AutoGLM_GUI/static/assets/index-5hCCwHA7.css +0 -1
  115. AutoGLM_GUI/static/assets/index-DHF1NZh0.js +0 -12
  116. AutoGLM_GUI/static/assets/workflows-xiplap-r.js +0 -1
  117. autoglm_gui-1.4.0.dist-info/RECORD +0 -100
  118. {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/WHEEL +0 -0
  119. {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/entry_points.txt +0 -0
  120. {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,157 @@
1
+ AutoGLM_GUI/__init__.py,sha256=XHKVEKeLm9LLoM5Otd_9Sk1ald7GxGIvIvjQlCpQTCc,2387
2
+ AutoGLM_GUI/__main__.py,sha256=liPZEFLCuhshCCodW_ldn0A63-i_gXy29TLIuDhuWvQ,6920
3
+ AutoGLM_GUI/config.py,sha256=OrjhbjcaZWBRLiLTzmknm6pONglDXjfoZngQd8jU9bk,2286
4
+ AutoGLM_GUI/config_manager.py,sha256=WZEa8p2iXIg1CneuquKh-EemdQL1uiPoz_I7yWPY99A,24312
5
+ AutoGLM_GUI/device_adapter.py,sha256=eA0bOlU-lzJig9cNCZrS0FqApEVbSOg7oke5P1iJ1rA,9011
6
+ AutoGLM_GUI/device_manager.py,sha256=QvxTjXvnVz_BhJAIfNColsXkw6YUQkf1yiC1An8u7kU,35293
7
+ AutoGLM_GUI/device_protocol.py,sha256=T3JctpggGekkYDeNXiPj_PoINPg8yFqalTx8Lk9w-9A,6944
8
+ AutoGLM_GUI/exceptions.py,sha256=QXTQzxdbEHDUrtaD1G4zuKZydFJO5ymNvuo8ZvVjUdA,3487
9
+ AutoGLM_GUI/history_manager.py,sha256=wbndRk2fnuY5n2bcsfbt2HEa_1N9AcnSC1xAn1VMgcA,6347
10
+ AutoGLM_GUI/i18n.py,sha256=-x8yD5ywyK7xwjoI-pOYqPlSeId_W9KkVjmKayr36m0,2501
11
+ AutoGLM_GUI/logger.py,sha256=wLzjbRqsHAsOtI9yB0bcUZFVzgGK6qM8330std6FjVw,2553
12
+ AutoGLM_GUI/metrics.py,sha256=L72hzMNzw1eemQLz6Hoth5sRW6OQlI4Np5BIFSFwl1c,8994
13
+ AutoGLM_GUI/phone_agent_manager.py,sha256=Bcz--lMTjLFzB2wX8teOm7CAyjcB7h_1E2eVkh3qogQ,19798
14
+ AutoGLM_GUI/platform_utils.py,sha256=tsZ-H6Lsgz3aceWQ3GVBmO4lgKBEyD5l-fYyXU7nPU4,3356
15
+ AutoGLM_GUI/prompt_config.py,sha256=3eNEDdlrk8rvuqakQTiVrGEmFFH5X-UdV-9yUibCSsI,336
16
+ AutoGLM_GUI/prompts.py,sha256=lC5UktW0KRpWKij6T0GG1OrNKV9gAfM6dUcp_tjX7mo,4814
17
+ AutoGLM_GUI/scheduler_manager.py,sha256=6EFo1o4QdD00V2-uMKIclQo_QYY8Ktwy_a3q22IUWds,10296
18
+ AutoGLM_GUI/schemas.py,sha256=JvEQlRZIzY1kZuahyMAKfqTVd10XtTkCzcvwrV81LxI,21274
19
+ AutoGLM_GUI/scrcpy_protocol.py,sha256=h6AtAGq3zMM30FWM7EYiYKJz7BDikpNCOjcKtFcRx3s,997
20
+ AutoGLM_GUI/scrcpy_stream.py,sha256=cI30cOnDeAgaWZhHdkbAvQvuWovA3FVC8IatyrSspeE,21376
21
+ AutoGLM_GUI/server.py,sha256=Fo2Afl6CFT5B6MBggvR0n_savNxqxW22bFdN51NXRJs,299
22
+ AutoGLM_GUI/socketio_server.py,sha256=Y_B3lKXCv9rDs8IbbKUCGoA2eldZ1pr6_qK1Gh99XOQ,6532
23
+ AutoGLM_GUI/state.py,sha256=qGnqZKdWFDpegVBLB4KHDbzfAVWc89C0AiCum_R9LSk,912
24
+ AutoGLM_GUI/types.py,sha256=FY-M0VWK0CyiLhV1gL1aqY872zdftlkHiJPLEORk4ug,2927
25
+ AutoGLM_GUI/version.py,sha256=z0MPXu-k9UO7RXxVGcAwim2M3yklrRqLwgOq66zSdzo,178
26
+ AutoGLM_GUI/workflow_manager.py,sha256=AZ_zrhSeEvtlF1HTNMNS3f4qBwRuX-bzImdZEVZVXV8,5552
27
+ AutoGLM_GUI/actions/__init__.py,sha256=kdprfh9IzhoaBBlKG10q6MROSQ96n-uo9GkREjcv9gc,165
28
+ AutoGLM_GUI/actions/handler.py,sha256=wztZwIDRIAhokxJ3LvDQY8i2AS9MdmTn7Q3h3e34Elo,6993
29
+ AutoGLM_GUI/actions/types.py,sha256=19TZNoi6KkFtsGkaqhw23vIE7c549j1t2CpKxBXuUKo,266
30
+ AutoGLM_GUI/adb/__init__.py,sha256=QdiryYhjt-Ox8MKGCUv6Dd9L1e0VFAcXS4TMpDNCi9s,1095
31
+ AutoGLM_GUI/adb/apps.py,sha256=D9HcsGzqwJL3g1ZRS4_Iq4WjviijDDEtkBJlV4-TSjE,8774
32
+ AutoGLM_GUI/adb/connection.py,sha256=4_u3tZ21y8SvDrucSTe1tLDk4MSIMRV-vRl6uS3njFk,9212
33
+ AutoGLM_GUI/adb/device.py,sha256=erVAjyefzsd1Dz5qB2nrr7zJwwPNFOrLiX_Se9r7Kjk,4351
34
+ AutoGLM_GUI/adb/input.py,sha256=DELZOebzg9Dh7OxOICe-cPIPbe6kIye2pUY24fIHTVM,1738
35
+ AutoGLM_GUI/adb/screenshot.py,sha256=VJWbZM3wNCc0kfdm6ra7ja0fcS9yw5KvfezzaMH4mfk,434
36
+ AutoGLM_GUI/adb/timing.py,sha256=3NPcuMzNQuo_eqs3SKQESsKeRa5L4Ice5GTyrx0WqLc,5790
37
+ AutoGLM_GUI/adb_plus/__init__.py,sha256=YwYYbeMrsKv8BGPcD6dv0PB8-wW93Ik9pMHo02i8QoU,945
38
+ AutoGLM_GUI/adb_plus/device.py,sha256=nCMDUd18LPfl5CsJeDjKd9HSt6FFOy2jS1muOSeLO08,1683
39
+ AutoGLM_GUI/adb_plus/ip.py,sha256=dt6ffIjiW_jGoEqUKUDi9fRGbWN1W9LxfT8rWIqgS-0,2200
40
+ AutoGLM_GUI/adb_plus/keyboard_installer.py,sha256=OfBdtFNbIZtiEgsJJaewgCAucaP4ppoAmd1M6y9-URY,13586
41
+ AutoGLM_GUI/adb_plus/mdns.py,sha256=AwQ4MQsPKVW1tPjIUpOWvBQGlJxvVrO9eTvxoqsgPUo,5862
42
+ AutoGLM_GUI/adb_plus/pair.py,sha256=3UElG0yjylhmZBtPpDER2BNMZs9CwDeavjuQOwOC4sw,1945
43
+ AutoGLM_GUI/adb_plus/qr_pair.py,sha256=WHvx76q2xfTnJPXf5y_qI9cNuiBC_KeWwqYpuROdPkw,13571
44
+ AutoGLM_GUI/adb_plus/screenshot.py,sha256=12JFZJsrUsW4hTSFCQMEDFtMA6WWROaa8bcXxVvi1iw,4023
45
+ AutoGLM_GUI/adb_plus/serial.py,sha256=DZEm5R-w9UypMpqPbnE1uRvQXsAx9Jr-4vQk07_X7eM,3850
46
+ AutoGLM_GUI/adb_plus/touch.py,sha256=cqGmGBmo9gUjSI31OBEV4Q_hLKhRo48ed4XNmbYwgSs,2135
47
+ AutoGLM_GUI/adb_plus/version.py,sha256=u5u1N40izJceeORN9kFhyz0wq_hZtlwBXaiytfA3px8,2428
48
+ AutoGLM_GUI/agents/__init__.py,sha256=z4AggrPOw8xa9S0e053UvOFIES5GTe052HZcjnYCmLU,1264
49
+ AutoGLM_GUI/agents/events.py,sha256=4810WrCNYqQFTpkIcU0lv3XUYwzR4uGn3MeshhDrFaI,392
50
+ AutoGLM_GUI/agents/factory.py,sha256=PKIcssxwGVjpCVqC03jS64vZUUarSDs1_9GB1AZTESE,4620
51
+ AutoGLM_GUI/agents/protocols.py,sha256=YkAprwChnLsomDRDyrPmzpI3uOAASHU-KHBpTxNryHs,572
52
+ AutoGLM_GUI/agents/stream_runner.py,sha256=SEtU-MTvRlVh_QImM3Tmwx1-P0aPHRsbz4eINiG-GuA,6569
53
+ AutoGLM_GUI/agents/glm/__init__.py,sha256=1b9SzDLmWXcxlPS708p96o4W1Ks40YWQ3dd2HPaYewI,179
54
+ AutoGLM_GUI/agents/glm/agent.py,sha256=Um5dqwGllzXP4f2YSqNtSAYhD01sQYTOJW1w8sWGoKU,9817
55
+ AutoGLM_GUI/agents/glm/message_builder.py,sha256=Z1eNMifdrQfojoDBnUBaFRMnxxvugE8kTvFX2jiyHFk,2302
56
+ AutoGLM_GUI/agents/glm/parser.py,sha256=nhWKIAlEloghxdw_pPEKMbLFufYsaj2aIFsYi_ZEU3A,3425
57
+ AutoGLM_GUI/agents/glm/prompts_en.py,sha256=ms92sFAl45gZC1mVdKjYN4kntJG-OZujbCcI-jyzJGo,2608
58
+ AutoGLM_GUI/agents/glm/prompts_zh.py,sha256=g3_6nBjcLZwc5Keg1u26Z4w6xjoL05Fk078263A5EZ8,8285
59
+ AutoGLM_GUI/agents/mai/__init__.py,sha256=HOma4Vc0SDgk2uyS-bq6IvWCrpTBTC_9u6TJp1fOplU,935
60
+ AutoGLM_GUI/agents/mai/agent.py,sha256=NHSwYUq7-PF0Md8cWWFZp-E5hAf9mjFLAy8eob-IopA,14023
61
+ AutoGLM_GUI/agents/mai/parser.py,sha256=0VoxBiW3gEaU759tw1iD0qoOOFgUcaq-CyiWQJ1u0aM,8453
62
+ AutoGLM_GUI/agents/mai/prompts.py,sha256=X3dvVLAP01_OcpUYiwJxBNhETU3lbY-EUK0gJURtkec,4392
63
+ AutoGLM_GUI/agents/mai/traj_memory.py,sha256=2oEpwgz4YGT3KgW0BojFYEcsZL8WKlkaBJao0QcLHcY,2965
64
+ AutoGLM_GUI/api/__init__.py,sha256=EYENHE-DjShI0EOy65i3cmpw7CT4EilaQgG3rmPW0Qs,6321
65
+ AutoGLM_GUI/api/agents.py,sha256=RoyGxikL57oSj4-B3NMsTjdwcTGkANbrUaThrylf0YQ,18259
66
+ AutoGLM_GUI/api/control.py,sha256=mxklJ7o4Eb_nc-lZKKG86zMk2s5DHv3WGPatnyzQB44,3374
67
+ AutoGLM_GUI/api/devices.py,sha256=lRjbkrNHM9aoXGw4pu0GidDDedR6DB5uPxsTQZJmcCE,14022
68
+ AutoGLM_GUI/api/health.py,sha256=ZedFUv-IHst_YUyeQ_MpL5JXOKuOhWVlHBVwAY3MM0I,266
69
+ AutoGLM_GUI/api/history.py,sha256=fXX7Ao3xlO6pByJ6kEwqJZW6REB4RSKHvTG-801wwIM,2799
70
+ AutoGLM_GUI/api/layered_agent.py,sha256=GvwajyfzFbfuPWhFxbz9p8fR1dspAH4x3VN_h5gwE0A,28403
71
+ AutoGLM_GUI/api/mcp.py,sha256=ppqzBxiS_8tUeJ5P8AszH_4m6q0sNe0oT2SCU_uISks,4625
72
+ AutoGLM_GUI/api/media.py,sha256=Cwtk2cikZSBK8gV-igpdtx7bsGYyb2letOe4ZEj0-k4,3806
73
+ AutoGLM_GUI/api/metrics.py,sha256=2h5MnUiMTRAshD6nvcarLVoSlOjtlyVAGg-LRXq03vQ,941
74
+ AutoGLM_GUI/api/scheduled_tasks.py,sha256=BJvu2TlNfXw6C_y4lYYl-sxfcUQzyE6TYWf-jiVa2-Y,3622
75
+ AutoGLM_GUI/api/version.py,sha256=DYiSNNykJsxOXO1zvf3w5AcAtQJTs2c-qtA1HRVihx0,6085
76
+ AutoGLM_GUI/api/workflows.py,sha256=waYwlm2HxwkcfZwp7Bm2Z2l_bms6UmOy2xJEU9B_t0s,2431
77
+ AutoGLM_GUI/devices/__init__.py,sha256=3RHDxS3feJ1jGdWbDKeOHSFUogmexcEr9lpcXWhlnjg,1471
78
+ AutoGLM_GUI/devices/adb_device.py,sha256=uKgSMh7Dn7VPkfjkGltUqAgMcXsXBPIvG4dD3C__MOw,6334
79
+ AutoGLM_GUI/devices/mock_device.py,sha256=StMmVnQ5YRkc6MNUSLz5-FAdW1l2eL1XTRxCHNwE6Lc,5786
80
+ AutoGLM_GUI/devices/remote_device.py,sha256=PXc2uoczKcDi7ovcIZoWcDA8U9CrIXDamyPApGXNXYI,5665
81
+ AutoGLM_GUI/model/__init__.py,sha256=eBat20fq5PyTUL_uRW33eQQzj6m3JQkG5rVgmDOgh3c,120
82
+ AutoGLM_GUI/model/message_builder.py,sha256=wUX3iFiR8kj2lBEpI9gJXVISzsW1AgvRVeIG4DaHmEQ,2148
83
+ AutoGLM_GUI/model/types.py,sha256=Lk96Nmn9U69RaT9OZbUgop2HvZK0Dx4ticCqpGgTsM8,750
84
+ AutoGLM_GUI/models/__init__.py,sha256=NYsb1EcyrcoSYeRYC9u4wEPfTmtEKUScxjd_RNrvIaQ,252
85
+ AutoGLM_GUI/models/history.py,sha256=r1yDMHUIPsoXKDE7LXvnGVee0WejVhfBpG5x4tWwAJI,3210
86
+ AutoGLM_GUI/models/scheduled_task.py,sha256=Z4qO9W72nRyI--gDLqAkO1AMN30B_SDAqZUeF7mQ6o8,2536
87
+ AutoGLM_GUI/parsers/__init__.py,sha256=pa5GK6zVJH0vxofZkkMf4UJmoqAUo29cxI9ujkBeI0E,693
88
+ AutoGLM_GUI/parsers/base.py,sha256=3HGsMAhIv5oQU9GjLGPzMDtDmhScpFQKYK1kaRhNm8w,1565
89
+ AutoGLM_GUI/parsers/phone_parser.py,sha256=JEgfZfpolmGM_j9F1gNZC1nxBNWXlo4h1k_JIyNuLCE,2053
90
+ AutoGLM_GUI/prompts/__init__.py,sha256=GtS7Ek3ds5G8orJj4R-2odjm1Hchje3cF1-1YaxZD38,1069
91
+ AutoGLM_GUI/static/favicon.ico,sha256=uB4wrcENiFaF-K4V-FzNp51XLW8GC4-ujpBDHgISfGM,781
92
+ AutoGLM_GUI/static/index.html,sha256=iz04ysnZj5cpaWthldZy6O-sCmSA5KjPCH1APSTVYfY,761
93
+ AutoGLM_GUI/static/logo-192.png,sha256=FOXgZxFrN0ZleO0VjlCnvD5cDkd1okJPkwPDZqsflNs,35243
94
+ AutoGLM_GUI/static/logo-512.png,sha256=HlwSg09dQ3_Jtb09Ght4l2VK3YNLZlP_941LCdpDkWk,251598
95
+ AutoGLM_GUI/static/assets/about-BQm96DAl.js,sha256=xEmpVOF_8m5s1CAiFxJTDblpMslqZFJaVDjD89VAurQ,155
96
+ AutoGLM_GUI/static/assets/alert-dialog-B42XxGPR.js,sha256=sfHYc7uvNPOjcCC7jXDvlMJMmasYWz3Agy5b6RWFvlA,3140
97
+ AutoGLM_GUI/static/assets/chat-C0L2gQYG.js,sha256=egJ_j0rCPsOALZljsmqNKSivAigGPr7cdsuXkY0h7DY,302373
98
+ AutoGLM_GUI/static/assets/circle-alert-D4rSJh37.js,sha256=mpHSXyEAtaWQI8LILk4_a9TJpxlny_3cCilkaakoORw,251
99
+ AutoGLM_GUI/static/assets/dialog-DZ78cEcj.js,sha256=v2RmsIYgjBKTVNI2HpAesH17EqNo82XmlKRGUGf4BNc,25373
100
+ AutoGLM_GUI/static/assets/history-DFBv7TGc.js,sha256=1k4H9Ma51pdgKoYqaq-2A9b_MZU9iWakXfUvRltSbJ4,5884
101
+ AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css,sha256=WG2Wb8arrb2tgjEDBBbOEcwbCkQ7aRhI8uvpMcAuMko,82586
102
+ AutoGLM_GUI/static/assets/index-CmZSnDqc.js,sha256=G5vfw7NEAYtlSADLga38py3lUGN58mgwQqW-AYyE04c,229
103
+ AutoGLM_GUI/static/assets/index-CssG-3TH.js,sha256=YvRGzMup5LSxIY7X9a6Vt7gjYKa9DQRLbjdDzNBh8Pg,396156
104
+ AutoGLM_GUI/static/assets/label-BCUzE_nm.js,sha256=_WZ19VEilw0jpb6sODsu372W2WO3eC2XJMb81-j-68g,1407
105
+ AutoGLM_GUI/static/assets/logo-Cyfm06Ym.png,sha256=6GRUFH2KNmB4ZmnrmJ1wxpoIYEmOW-__XlmKImyAMKM,4955675
106
+ AutoGLM_GUI/static/assets/logs-eoFxn5of.js,sha256=e4mapCJ_JmN55jlO54fmaSUBbu1g1O9kRbxe8PymUhk,5048
107
+ AutoGLM_GUI/static/assets/popover-DLsuV5Sx.js,sha256=I-845Z4i6hu0xAVZ5wilyEM9cQRfZ_l1lD0mXD_RAGU,5782
108
+ AutoGLM_GUI/static/assets/scheduled-tasks-MyqGJvy_.js,sha256=BmNZ_ZRs_U7to9U_WqqX6BEJhl_CPWDvdx_7Ko26MUo,8364
109
+ AutoGLM_GUI/static/assets/square-pen-zGWYrdfj.js,sha256=_OUY1FnCWYMwexPiWjOngv49-CiRjZHf6qouLRdCySE,321
110
+ AutoGLM_GUI/static/assets/textarea-BX6y7uM5.js,sha256=TDNf7gHPjEtPmhf7COizAwL4PsonsSJrkf362jsk6m8,705
111
+ AutoGLM_GUI/static/assets/worker-D6BRitjy.js,sha256=RL1NIRIks9dXdDhXOHK1cCg-OELT0uv9a6u_UEHfQ0Q,173494
112
+ AutoGLM_GUI/static/assets/workflows-CYFs6ssC.js,sha256=sJWl2I4Ej3VeHhwC--sg7Bv3WcmryGzPw1CVec6RaPA,3301
113
+ phone_agent/__init__.py,sha256=7sCmGiY-ePdb6L08MGG6DkOiu8goop6wq-v2SiM62tE,360
114
+ phone_agent/agent.py,sha256=1SgHpv70_XbujG1ElYRZbvRO5-d4l7gBgiFRBz_FIH8,8157
115
+ phone_agent/agent_ios.py,sha256=R7EBsoHVghEUBtI5TB7M0_btpFcb4NHquMNxktrqrJE,9338
116
+ phone_agent/device_factory.py,sha256=x_zsU2SNk4kh_OQafBoAfvH6nmMLCP-koqUUvcjNBhg,5027
117
+ phone_agent/actions/__init__.py,sha256=YiAMeBMVhRfmDOf0hQ_1FEBSV5ApcnHPQ05gHjA_weQ,160
118
+ phone_agent/actions/handler.py,sha256=QsRWS3RB_CQxdSmoNFNsmSe58j3__D0yDYB52mcmNN8,15640
119
+ phone_agent/actions/handler_ios.py,sha256=YmK-QabWqCTHBemNJFGnEqDZO5Zhu05KBEvuyBNYLB8,10223
120
+ phone_agent/adb/__init__.py,sha256=w3CBSGRv-cQPME-Q-Bx2tbL_WJkpb9RqmT2olNK9TZw,950
121
+ phone_agent/adb/connection.py,sha256=Epmghk079HcDCqOYARIv8u1kb_oit1Y8rxRhBlqr0z0,10344
122
+ phone_agent/adb/device.py,sha256=NRZAT9b8wT1wRJzV6176fMYxgmACkwyl0ASoEs3Id8g,6686
123
+ phone_agent/adb/input.py,sha256=Yr-5oXcYITkPl8humom7k5eakS3OMVPilj-N0veeCQQ,2857
124
+ phone_agent/adb/screenshot.py,sha256=FgPH6WgFBxU7JPgWpaCgQbltcm4LvhIoTmR5X5rm0yM,3161
125
+ phone_agent/config/__init__.py,sha256=mtVL2HYW5r5UrGejCsTDefXTnLvpfxrJouvfqc2JaS8,1322
126
+ phone_agent/config/apps.py,sha256=D9HcsGzqwJL3g1ZRS4_Iq4WjviijDDEtkBJlV4-TSjE,8774
127
+ phone_agent/config/apps_harmonyos.py,sha256=z2QN6O7NPe87ta4EAPs6s4PAC74SPPXlXXz6fTkAXhY,10606
128
+ phone_agent/config/apps_ios.py,sha256=CrOEDnPyG7T2qF_lS3SDlXjhbJPwDKI61_yOrggernc,11481
129
+ phone_agent/config/i18n.py,sha256=-x8yD5ywyK7xwjoI-pOYqPlSeId_W9KkVjmKayr36m0,2501
130
+ phone_agent/config/prompts.py,sha256=0XKS_LrXthPB_bAXUT3ezsyr7UlwJNR2i4FLdEuticQ,8444
131
+ phone_agent/config/prompts_en.py,sha256=t8C9YLailtg8kVlFtYvq3t9EdtMGE8HastTfDh5BAUM,2630
132
+ phone_agent/config/prompts_zh.py,sha256=oOd5wJz-_P9ZF3F8hIPPrwjXdgv0sIgH7aop94eOTKM,8603
133
+ phone_agent/config/timing.py,sha256=bT-5HKGovzY_95wxRGIhmHyrIIPqhWivsXg-0LWc2pM,5793
134
+ phone_agent/hdc/__init__.py,sha256=6Olk6ujXQovtzmRLXk5MIqbwffxleSQnoHYvCO4G1Ww,996
135
+ phone_agent/hdc/connection.py,sha256=TweQ417wU2M35mgddK_Xt92zn5tZ0K7vtsaQjLn_e1o,11326
136
+ phone_agent/hdc/device.py,sha256=-tOXttrO2oGNFWg5dIGDgEEwa_jfesFTzpgTQJqlYik,7598
137
+ phone_agent/hdc/input.py,sha256=28TcK1FK1-4qpqbR3_vdjHc-l75en85EGI3rDpPy4rE,4901
138
+ phone_agent/hdc/screenshot.py,sha256=sD21nGQidt9_S7nMXDZ3qMl8A2m7Q95nkwJD8vD-9Nc,4099
139
+ phone_agent/model/__init__.py,sha256=w6yFkla9VoUvX5vZkTWa7Z5Bg5_lfA9AajrLjZsdL24,149
140
+ phone_agent/model/client.py,sha256=vpkWoqJUETd4a9P2fQHJAMdhVDQoALyByb9bpTgDPfY,9707
141
+ phone_agent/xctest/__init__.py,sha256=AKIwp6UfoaNdDvDveAp_v5h5UQPxU-PQTefMpfayzj0,881
142
+ phone_agent/xctest/connection.py,sha256=m2lT9aNZgknDzItPoA7KOiFaDVOPqx4Imdhl6GcHx20,10833
143
+ phone_agent/xctest/device.py,sha256=sOAPMoliMIabcrtsmzuGDKnZXtitqlc0lo8RSjQta7A,13655
144
+ phone_agent/xctest/input.py,sha256=aMp1YCRGBtGsRMKhA9rrjxuLkri0_k3n6hisi0EBJ2o,8164
145
+ phone_agent/xctest/screenshot.py,sha256=H6dsQGgf38h8dvuSeigiR2Qr8NucPanIgTK8kjlV9hA,6027
146
+ mai_agent/base.py,sha256=hYjGQXe_qguYkGq2qVWVO_xzsLzNqayA-dox_usCCM0,4546
147
+ mai_agent/mai_grounding_agent.py,sha256=d1X6giinaFVnTtKwh7oFktGZCkDjUySLb22toItykxM,8664
148
+ mai_agent/mai_naivigation_agent.py,sha256=7oA_mjxHfUEYEz2-oLdCEIImNQvAK-V4ADIMCqPPM2s,18264
149
+ mai_agent/prompt.py,sha256=Zgnj5JmVhCGjgM0sB-q19q0G76OCcNdffLypmwxRdl8,7111
150
+ mai_agent/unified_memory.py,sha256=AqEux-0I2WUIQOaY_yCRi-MDuU1BuY6dEWgClQETnEk,2194
151
+ mai_agent/utils.py,sha256=oJ50FEOM13-NwPHY4ttQ0tH49GS-w2TEy1_1FMCaxLg,2304
152
+ scrcpy-server-v3.3.3,sha256=fnAyO6fyWWSd1KzOl6xP77roECssbZHi575hP9U1S-A,90164
153
+ autoglm_gui-1.5.0.dist-info/METADATA,sha256=iEzLUm032HxRlibY7Ixd56dE70Ocy9VBgrQZBf5QlR0,19861
154
+ autoglm_gui-1.5.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
155
+ autoglm_gui-1.5.0.dist-info/entry_points.txt,sha256=sz4rBO_kgrYmOiT0QnhCCv0b9QqBdWyCjugJgY8AEOI,58
156
+ autoglm_gui-1.5.0.dist-info/licenses/LICENSE,sha256=0IkSHDewdtmXnmYzTNq4U47EJYjTuhjQNpT0bZKuqWc,11342
157
+ autoglm_gui-1.5.0.dist-info/RECORD,,
mai_agent/base.py ADDED
@@ -0,0 +1,137 @@
1
+ # Copyright (c) 2025, Alibaba Cloud and its affiliates;
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ """Base agent class for mobile GUI automation agents."""
15
+
16
+ from abc import ABC, abstractmethod
17
+ from typing import Any, Dict, List, Tuple
18
+
19
+ from unified_memory import TrajMemory
20
+
21
+
22
+ class BaseAgent(ABC):
23
+ """
24
+ Abstract base class for all GUI automation agents.
25
+
26
+ This class provides common functionality for trajectory management
27
+ and defines the interface that all agents must implement.
28
+ """
29
+
30
+ def __init__(self) -> None:
31
+ """Initialize the base agent with empty trajectory memory."""
32
+ self.traj_memory = TrajMemory(
33
+ task_goal="",
34
+ task_id="",
35
+ steps=[],
36
+ )
37
+
38
+ @property
39
+ def thoughts(self) -> List[str]:
40
+ """Return list of thoughts from trajectory memory."""
41
+ return [step.thought if step.thought else "" for step in self.traj_memory.steps]
42
+
43
+ @property
44
+ def actions(self) -> List[Dict[str, Any]]:
45
+ """Return list of actions from trajectory memory."""
46
+ return [step.action for step in self.traj_memory.steps]
47
+
48
+ @property
49
+ def conclusions(self) -> List[str]:
50
+ """Return list of conclusions from trajectory memory."""
51
+ return [step.conclusion for step in self.traj_memory.steps]
52
+
53
+ @property
54
+ def observations(self) -> List[Dict[str, Any]]:
55
+ """Return list of observations from trajectory memory."""
56
+ return [
57
+ {
58
+ "screenshot": step.screenshot_bytes,
59
+ "accessibility_tree": step.accessibility_tree,
60
+ }
61
+ for step in self.traj_memory.steps
62
+ ]
63
+
64
+ @property
65
+ def history_images(self) -> List[bytes]:
66
+ """Return list of screenshot bytes from trajectory memory."""
67
+ return [step.screenshot_bytes for step in self.traj_memory.steps]
68
+
69
+ @property
70
+ def history_responses(self) -> List[str]:
71
+ """Return list of predictions from trajectory memory."""
72
+ return [step.prediction for step in self.traj_memory.steps]
73
+
74
+ @abstractmethod
75
+ def predict(
76
+ self,
77
+ instruction: str,
78
+ obs: Dict[str, Any],
79
+ **kwargs: Any,
80
+ ) -> Tuple[str, Dict[str, Any]]:
81
+ """
82
+ Predict the next action based on the current observation.
83
+
84
+ Args:
85
+ instruction: Task instruction/goal.
86
+ obs: Current observation containing screenshot and optional accessibility tree.
87
+ **kwargs: Additional keyword arguments.
88
+
89
+ Returns:
90
+ Tuple of (prediction_text, action_dict).
91
+ """
92
+ pass
93
+
94
+ def reset(self) -> None:
95
+ """Reset the trajectory memory for a new task."""
96
+ self.traj_memory = TrajMemory(
97
+ task_goal="",
98
+ task_id="",
99
+ steps=[],
100
+ )
101
+
102
+ def load_traj(self, traj_memory: TrajMemory) -> None:
103
+ """
104
+ Load trajectory from existing TrajMemory object.
105
+
106
+ Args:
107
+ traj_memory: TrajMemory object containing trajectory data.
108
+ """
109
+ self.traj_memory = traj_memory
110
+
111
+ def save_traj(self) -> Dict[str, Any]:
112
+ """
113
+ Save current trajectory to a dictionary format.
114
+
115
+ Returns:
116
+ Dictionary containing the trajectory data that can be serialized.
117
+ """
118
+ steps_data = []
119
+ for step in self.traj_memory.steps:
120
+ step_dict = {
121
+ "screenshot_bytes": step.screenshot_bytes,
122
+ "accessibility_tree": step.accessibility_tree,
123
+ "prediction": step.prediction,
124
+ "action": step.action,
125
+ "conclusion": step.conclusion,
126
+ "thought": step.thought,
127
+ "step_index": step.step_index,
128
+ "agent_type": step.agent_type,
129
+ "model_name": step.model_name,
130
+ }
131
+ steps_data.append(step_dict)
132
+
133
+ return {
134
+ "task_goal": self.traj_memory.task_goal,
135
+ "task_id": self.traj_memory.task_id,
136
+ "steps": steps_data,
137
+ }
@@ -0,0 +1,263 @@
1
+ # Copyright (c) 2025, Alibaba Cloud and its affiliates;
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ """
15
+ MAI Grounding Agent - A GUI grounding agent for locating UI elements.
16
+
17
+ This module provides the MAIGroundingAgent class that uses vision-language models
18
+ to locate UI elements based on natural language instructions.
19
+ """
20
+
21
+ import json
22
+ import re
23
+ from io import BytesIO
24
+ from typing import Any, Dict, Optional, Tuple, Union
25
+
26
+ from openai import OpenAI
27
+ from PIL import Image
28
+
29
+ from prompt import MAI_MOBILE_SYS_PROMPT_GROUNDING
30
+ from utils import pil_to_base64
31
+
32
+
33
+ # Constants
34
+ SCALE_FACTOR = 999
35
+
36
+
37
+ def parse_grounding_response(text: str) -> Dict[str, Any]:
38
+ """
39
+ Parse model output text containing grounding_think and answer tags.
40
+
41
+ Args:
42
+ text: Raw model output containing <grounding_think> and <answer> tags.
43
+
44
+ Returns:
45
+ Dictionary with keys:
46
+ - "thinking": The model's reasoning process
47
+ - "coordinate": Normalized [x, y] coordinate
48
+
49
+ Raises:
50
+ ValueError: If parsing fails or JSON is invalid.
51
+ """
52
+ text = text.strip()
53
+
54
+ result: Dict[str, Any] = {
55
+ "thinking": None,
56
+ "coordinate": None,
57
+ }
58
+
59
+ # Extract thinking content
60
+ think_pattern = r"<grounding_think>(.*?)</grounding_think>"
61
+ think_match = re.search(think_pattern, text, re.DOTALL)
62
+ if think_match:
63
+ result["thinking"] = think_match.group(1).strip()
64
+
65
+ # Extract answer content
66
+ answer_pattern = r"<answer>(.*?)</answer>"
67
+ answer_match = re.search(answer_pattern, text, re.DOTALL)
68
+ if answer_match:
69
+ answer_text = answer_match.group(1).strip()
70
+ try:
71
+ answer_json = json.loads(answer_text)
72
+ coordinates = answer_json.get("coordinate", [])
73
+ if len(coordinates) == 2:
74
+ # Normalize coordinates from SCALE_FACTOR range to [0, 1]
75
+ point_x = coordinates[0] / SCALE_FACTOR
76
+ point_y = coordinates[1] / SCALE_FACTOR
77
+ result["coordinate"] = [point_x, point_y]
78
+ else:
79
+ raise ValueError(
80
+ f"Invalid coordinate format: expected 2 values, got {len(coordinates)}"
81
+ )
82
+ except json.JSONDecodeError as e:
83
+ raise ValueError(f"Invalid JSON in answer: {e}")
84
+
85
+ return result
86
+
87
+
88
+ class MAIGroundingAgent:
89
+ """
90
+ GUI grounding agent using vision-language models.
91
+
92
+ This agent processes a screenshot and natural language instruction to
93
+ locate a specific UI element and return its coordinates.
94
+
95
+ Attributes:
96
+ llm_base_url: Base URL for the LLM API endpoint.
97
+ model_name: Name of the model to use for predictions.
98
+ runtime_conf: Configuration dictionary for runtime parameters.
99
+ """
100
+
101
+ def __init__(
102
+ self,
103
+ llm_base_url: str,
104
+ model_name: str,
105
+ runtime_conf: Optional[Dict[str, Any]] = None,
106
+ ):
107
+ """
108
+ Initialize the MAIGroundingAgent.
109
+
110
+ Args:
111
+ llm_base_url: Base URL for the LLM API endpoint.
112
+ model_name: Name of the model to use.
113
+ runtime_conf: Optional configuration dictionary with keys:
114
+ - max_pixels: Maximum pixels for image processing
115
+ - min_pixels: Minimum pixels for image processing
116
+ - temperature: Sampling temperature (default: 0.0)
117
+ - top_k: Top-k sampling parameter (default: -1)
118
+ - top_p: Top-p sampling parameter (default: 1.0)
119
+ - max_tokens: Maximum tokens in response (default: 2048)
120
+ """
121
+ # Set default configuration
122
+ default_conf = {
123
+ "temperature": 0.0,
124
+ "top_k": -1,
125
+ "top_p": 1.0,
126
+ "max_tokens": 2048,
127
+ }
128
+ self.runtime_conf = {**default_conf, **(runtime_conf or {})}
129
+
130
+ self.llm_base_url = llm_base_url
131
+ self.model_name = model_name
132
+ self.llm = OpenAI(
133
+ base_url=self.llm_base_url,
134
+ api_key="empty",
135
+ )
136
+
137
+ # Extract frequently used config values
138
+ self.temperature = self.runtime_conf["temperature"]
139
+ self.top_k = self.runtime_conf["top_k"]
140
+ self.top_p = self.runtime_conf["top_p"]
141
+ self.max_tokens = self.runtime_conf["max_tokens"]
142
+
143
+ @property
144
+ def system_prompt(self) -> str:
145
+ """Return the system prompt for grounding tasks."""
146
+ return MAI_MOBILE_SYS_PROMPT_GROUNDING
147
+
148
+ def _build_messages(
149
+ self,
150
+ instruction: str,
151
+ image: Image.Image,
152
+ ) -> list:
153
+ """
154
+ Build the message list for the LLM API call.
155
+
156
+ Args:
157
+ instruction: Grounding instruction from user.
158
+ image: PIL Image of the screenshot.
159
+ magic_prompt: Whether to use the magic prompt format.
160
+
161
+ Returns:
162
+ List of message dictionaries for the API.
163
+ """
164
+ encoded_string = pil_to_base64(image)
165
+
166
+ messages = [
167
+ {
168
+ "role": "system",
169
+ "content": [
170
+ {
171
+ "type": "text",
172
+ "text": self.system_prompt,
173
+ }
174
+ ],
175
+ }
176
+ ]
177
+
178
+ messages.append(
179
+ {
180
+ "role": "user",
181
+ "content": [
182
+ {
183
+ "type": "text",
184
+ "text": instruction + "\n",
185
+ },
186
+ {
187
+ "type": "image_url",
188
+ "image_url": {"url": f"data:image/png;base64,{encoded_string}"},
189
+ },
190
+ ],
191
+ }
192
+ )
193
+
194
+ return messages
195
+
196
+ def predict(
197
+ self,
198
+ instruction: str,
199
+ image: Union[Image.Image, bytes],
200
+ **kwargs: Any,
201
+ ) -> Tuple[str, Dict[str, Any]]:
202
+ """
203
+ Predict the coordinate of the UI element based on the instruction.
204
+
205
+ Args:
206
+ instruction: Grounding instruction describing the UI element to locate.
207
+ image: PIL Image or bytes of the screenshot.
208
+ **kwargs: Additional arguments (unused).
209
+
210
+ Returns:
211
+ Tuple of (prediction_text, result_dict) where:
212
+ - prediction_text: Raw model response or error message
213
+ - result_dict: Dictionary containing:
214
+ - "thinking": Model's reasoning process
215
+ - "coordinate": Normalized [x, y] coordinate
216
+ """
217
+ # Convert bytes to PIL Image if necessary
218
+ if isinstance(image, bytes):
219
+ image = Image.open(BytesIO(image))
220
+
221
+ if image.mode != "RGB":
222
+ image = image.convert("RGB")
223
+
224
+ # Build messages
225
+ messages = self._build_messages(instruction, image)
226
+
227
+ # Make API call with retry logic
228
+ max_retries = 3
229
+ prediction = None
230
+ result = None
231
+
232
+ for attempt in range(max_retries):
233
+ try:
234
+ response = self.llm.chat.completions.create(
235
+ model=self.model_name,
236
+ messages=messages,
237
+ max_tokens=self.max_tokens,
238
+ temperature=self.temperature,
239
+ top_p=self.top_p,
240
+ frequency_penalty=0.0,
241
+ presence_penalty=0.0,
242
+ extra_body={"repetition_penalty": 1.0, "top_k": self.top_k},
243
+ seed=42,
244
+ )
245
+ prediction = response.choices[0].message.content.strip()
246
+ print(f"Raw response:\n{prediction}")
247
+
248
+ # Parse response
249
+ result = parse_grounding_response(prediction)
250
+ print(f"Parsed result:\n{result}")
251
+ break
252
+
253
+ except Exception as e:
254
+ print(f"Error on attempt {attempt + 1}: {e}")
255
+ prediction = None
256
+ result = None
257
+
258
+ # Return error if all retries failed
259
+ if prediction is None or result is None:
260
+ print("Max retry attempts reached, returning error flag.")
261
+ return "llm client error", {"thinking": None, "coordinate": None}
262
+
263
+ return prediction, result