kl-mcp-client 2.1.12__tar.gz → 2.1.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kl-mcp-client
3
- Version: 2.1.12
3
+ Version: 2.1.13
4
4
  Summary: MCP Client for Python
5
5
  Author-email: Kyle <hngan.it@gmail.com>
6
6
  License: MIT
@@ -1,373 +1,300 @@
1
1
  # async_tools.py
2
+ import logging
2
3
  from typing import Any, Dict, Optional
3
4
 
4
5
  from .client import MCPClient
5
6
 
7
+ # ======================================================
8
+ # LOGGER
9
+ # ======================================================
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # ======================================================
14
+ # CLASS
15
+ # ======================================================
16
+
6
17
 
7
18
  class MCPTools:
8
19
  """
9
- Async wrapper cho Google ADK + MCP Server.
10
- - Dùng MCPClientAsync (httpx async)
11
- - Tất cả method đều async
12
- - Screenshot trả về đúng format ADK Web yêu cầu
20
+ Async wrapper cho Google ADK + MCP Server
13
21
  """
14
22
 
23
+ # ======================================================
24
+ # INIT
25
+ # ======================================================
26
+
15
27
  def __init__(self, client: MCPClient):
16
28
  self.client = client
29
+ logger.info("MCPTools initialized")
30
+
31
+ # ======================================================
32
+ # BROWSER RUNTIME (NO SESSION)
33
+ # ======================================================
34
+
35
+ async def create_browser(self, payload: Optional[Dict[str, Any]] = None):
36
+ logger.info("create_browser")
37
+ res = await self.client.call_tool("createBrowser", payload or {})
38
+ return res.get("structuredContent", {})
39
+
40
+ async def release_browser(self, pop_name: str):
41
+ logger.info("release_browser | %s", pop_name)
42
+ res = await self.client.call_tool(
43
+ "releaseBrowser", {"pod_name": pop_name}
44
+ )
45
+ return res.get("structuredContent", {})
17
46
 
18
47
  # ======================================================
19
48
  # SESSION MANAGEMENT
20
49
  # ======================================================
21
50
 
22
51
  async def create_session(self, cdpUrl: str) -> Dict[str, Any]:
52
+ logger.info("create_session | %s", cdpUrl)
23
53
  sid = await self.client.create_session(cdpUrl)
24
54
  return {"sessionId": sid}
25
55
 
26
56
  async def close_session(self, sessionId: str) -> Dict[str, Any]:
57
+ logger.info("close_session | %s", sessionId)
27
58
  ok = await self.client.close_session(sessionId)
28
59
  return {"ok": bool(ok)}
29
60
 
30
61
  async def list_sessions(self) -> Dict[str, Any]:
62
+ logger.debug("list_sessions")
31
63
  return {"sessions": self.client.list_local_sessions()}
32
64
 
33
65
  # ======================================================
34
- # NAVIGATION & DOM
66
+ # TAB MANAGEMENT
35
67
  # ======================================================
36
68
 
37
- async def open_page(self, sessionId: str, url: str) -> Dict[str, Any]:
69
+ async def new_tab(self, sessionId: str, url: str = "about:blank"):
70
+ logger.info("new_tab | %s", url)
38
71
  res = await self.client.call_tool(
39
- "openPage", {"sessionId": sessionId, "url": url}
72
+ "newTab", {"sessionId": sessionId, "url": url}
40
73
  )
41
74
  return res.get("structuredContent", {})
42
75
 
43
- async def get_html(self, sessionId: str) -> Dict[str, Any]:
44
- res = await self.client.call_tool("getHTML", {"sessionId": sessionId})
76
+ async def close_tab(self, sessionId: str, tabId: str):
77
+ logger.info("close_tab | %s", tabId)
78
+ res = await self.client.call_tool(
79
+ "closeTab", {"sessionId": sessionId, "tabId": tabId}
80
+ )
45
81
  return res.get("structuredContent", {})
46
82
 
47
- async def screenshot(self, sessionId: str) -> Dict[str, Any]:
48
- """
49
- ADK Web expects:
50
- {
51
- "type": "image",
52
- "mimeType": "image/png",
53
- "data": "<base64>"
54
- }
55
- """
56
- res = await self.client.call_tool("screenshot", {"sessionId": sessionId})
57
- return res["content"][0]
83
+ async def switch_tab(self, sessionId: str, targetId: str):
84
+ logger.info("switch_tab | %s", targetId)
85
+ res = await self.client.call_tool(
86
+ "switchTab", {"sessionId": sessionId, "targetId": targetId}
87
+ )
88
+ return res.get("structuredContent", {})
58
89
 
59
- async def click(self, sessionId: str, selector: str) -> Dict[str, Any]:
90
+ async def current_tab(self, sessionId: str):
91
+ logger.debug("current_tab")
60
92
  res = await self.client.call_tool(
61
- "click", {"sessionId": sessionId, "selector": selector}
93
+ "currentTab", {"sessionId": sessionId}
62
94
  )
63
95
  return res.get("structuredContent", {})
64
96
 
65
- async def type(self, sessionId: str, selector: str, text: str) -> Dict[str, Any]:
97
+ # ======================================================
98
+ # NAVIGATION & DOM
99
+ # ======================================================
100
+
101
+ async def open_page(self, sessionId: str, url: str):
102
+ logger.info("open_page | %s", url)
66
103
  res = await self.client.call_tool(
67
- "type", {"sessionId": sessionId,
68
- "selector": selector, "text": text}
104
+ "openPage", {"sessionId": sessionId, "url": url}
69
105
  )
70
106
  return res.get("structuredContent", {})
71
107
 
72
- async def evaluate(self, sessionId: str, expression: str) -> Dict[str, Any]:
108
+ async def get_html(self, sessionId: str):
109
+ logger.debug("get_html")
110
+ res = await self.client.call_tool("getHTML", {"sessionId": sessionId})
111
+ return res.get("structuredContent", {})
112
+
113
+ async def evaluate(self, sessionId: str, expression: str):
114
+ logger.debug("evaluate")
73
115
  res = await self.client.call_tool(
74
116
  "evaluate", {"sessionId": sessionId, "expression": expression}
75
117
  )
76
118
  return res.get("structuredContent", {})
77
119
 
120
+ async def screenshot(self, sessionId: str):
121
+ logger.info("screenshot")
122
+ res = await self.client.call_tool("screenshot", {"sessionId": sessionId})
123
+ return res["content"][0]
124
+
125
+ async def wait_for_selector(
126
+ self, sessionId: str, selector: str, timeoutMs: Optional[int] = None
127
+ ):
128
+ logger.debug("wait_for_selector | %s", selector)
129
+ args = {"sessionId": sessionId, "selector": selector}
130
+ if timeoutMs:
131
+ args["timeoutMs"] = int(timeoutMs)
132
+
133
+ res = await self.client.call_tool("waitForSelector", args)
134
+ return res.get("structuredContent", {})
135
+
78
136
  # ======================================================
79
137
  # ELEMENT UTILITIES
80
138
  # ======================================================
81
139
 
82
- async def find_element(self, sessionId: str, selector: str) -> Dict[str, Any]:
140
+ async def find_element(self, sessionId: str, selector: str):
141
+ logger.debug("find_element | %s", selector)
83
142
  res = await self.client.call_tool(
84
143
  "findElement", {"sessionId": sessionId, "selector": selector}
85
144
  )
86
145
  return res.get("structuredContent", {})
87
146
 
88
- async def find_all(self, sessionId: str, selector: str) -> Dict[str, Any]:
147
+ async def find_all(self, sessionId: str, selector: str):
148
+ logger.debug("find_all | %s", selector)
89
149
  res = await self.client.call_tool(
90
150
  "findAll", {"sessionId": sessionId, "selector": selector}
91
151
  )
92
152
  return res.get("structuredContent", {})
93
153
 
94
- async def get_bounding_box(self, sessionId: str, selector: str) -> Dict[str, Any]:
154
+ async def get_bounding_box(self, sessionId: str, selector: str):
155
+ logger.debug("get_bounding_box | %s", selector)
95
156
  res = await self.client.call_tool(
96
157
  "getBoundingBox", {"sessionId": sessionId, "selector": selector}
97
158
  )
98
159
  return res.get("structuredContent", {})
99
160
 
100
- async def click_bounding_box(self, sessionId: str, selector: str) -> Dict[str, Any]:
161
+ async def click_bounding_box(self, sessionId: str, selector: str):
162
+ logger.debug("click_bounding_box | %s", selector)
101
163
  res = await self.client.call_tool(
102
164
  "clickBoundingBox", {"sessionId": sessionId, "selector": selector}
103
165
  )
104
166
  return res.get("structuredContent", {})
105
167
 
106
- async def upload_file(
107
- self, sessionId: str, selector: str, filename: str, base64data: str
108
- ) -> Dict[str, Any]:
109
- res = await self.client.call_tool(
110
- "uploadFile",
111
- {
112
- "sessionId": sessionId,
113
- "selector": selector,
114
- "filename": filename,
115
- "data": base64data,
116
- },
117
- )
118
- return res.get("structuredContent", {})
119
-
120
- async def wait_for_selector(
121
- self, sessionId: str, selector: str, timeoutMs: Optional[int] = None
122
- ) -> Dict[str, Any]:
123
- args = {"sessionId": sessionId, "selector": selector}
124
- if timeoutMs is not None:
125
- args["timeoutMs"] = int(timeoutMs)
126
-
127
- res = await self.client.call_tool("waitForSelector", args)
128
- return res.get("structuredContent", {})
129
-
130
168
  # ======================================================
131
- # TAB MANAGEMENT
169
+ # ADVANCED FIND / CLICK
132
170
  # ======================================================
133
171
 
134
- async def new_tab(
135
- self, sessionId: str, url: Optional[str] = "about:blank"
136
- ) -> Dict[str, Any]:
172
+ async def click(self, sessionId: str, selector: str):
173
+ logger.debug("click | %s", selector)
137
174
  res = await self.client.call_tool(
138
- "newTab", {"sessionId": sessionId, "url": url}
175
+ "click", {"sessionId": sessionId, "selector": selector}
139
176
  )
140
177
  return res.get("structuredContent", {})
141
178
 
142
- async def switch_tab(self, sessionId: str, targetId: str) -> Dict[str, Any]:
179
+ async def type(self, sessionId: str, selector: str, text: str):
180
+ logger.debug("type | %s", selector)
143
181
  res = await self.client.call_tool(
144
- "switchTab", {"sessionId": sessionId, "targetId": targetId}
182
+ "type",
183
+ {"sessionId": sessionId, "selector": selector, "text": text},
145
184
  )
146
185
  return res.get("structuredContent", {})
147
186
 
148
- # ======================================================
149
- # ADVANCED ACTIONS
150
- # ======================================================
151
-
152
- async def click_to_text(self, sessionId: str, text: str) -> Dict[str, Any]:
187
+ async def click_to_text(self, sessionId: str, text: str):
188
+ logger.debug("click_to_text | %s", text)
153
189
  res = await self.client.call_tool(
154
190
  "clickToText", {"sessionId": sessionId, "text": text}
155
191
  )
156
192
  return res.get("structuredContent", {})
157
193
 
158
- async def find_element_xpath(self, sessionId: str, xpath: str) -> Dict[str, Any]:
194
+ async def find_element_xpath(self, sessionId: str, xpath: str):
195
+ logger.debug("find_element_xpath")
159
196
  res = await self.client.call_tool(
160
197
  "findElementByXPath", {"sessionId": sessionId, "xpath": xpath}
161
198
  )
162
199
  return res.get("structuredContent", {})
163
200
 
164
- async def find_element_by_text(self, sessionId: str, text: str) -> Dict[str, Any]:
201
+ async def find_element_by_text(self, sessionId: str, text: str):
202
+ logger.debug("find_element_by_text | %s", text)
165
203
  res = await self.client.call_tool(
166
204
  "findElementByText", {"sessionId": sessionId, "text": text}
167
205
  )
168
206
  return res.get("structuredContent", {})
169
207
 
170
- async def click_by_node_id(self, sessionId: str, nodeId: int) -> Dict[str, Any]:
208
+ async def click_by_node_id(self, sessionId: str, nodeId: int):
209
+ logger.debug("click_by_node_id | %s", nodeId)
171
210
  res = await self.client.call_tool(
172
211
  "clickByNodeId", {"sessionId": sessionId, "nodeId": nodeId}
173
212
  )
174
213
  return res.get("structuredContent", {})
175
214
 
176
- async def import_cookies(self, sessionId: str, cookies: dict) -> Dict[str, Any]:
215
+ async def upload_file(
216
+ self, sessionId: str, selector: str, filename: str, base64data: str
217
+ ):
218
+ logger.info("upload_file | %s", filename)
177
219
  res = await self.client.call_tool(
178
- "importCookies", {"sessionId": sessionId, "cookies": cookies}
220
+ "uploadFile",
221
+ {
222
+ "sessionId": sessionId,
223
+ "selector": selector,
224
+ "filename": filename,
225
+ "data": base64data,
226
+ },
179
227
  )
180
228
  return res.get("structuredContent", {})
181
229
 
182
- async def get_dom_tree(self, sessionId: str, args: Optional[dict] = None):
183
- return await self.client.call_tool(
184
- "getDomTree", {"sessionId": sessionId, "args": args or {}}
185
- )
186
-
187
- async def get_clickable(self, sessionId: str, args: Optional[dict] = None):
188
- return await self.client.call_tool(
189
- "getClickable", {"sessionId": sessionId, "args": args or {}}
190
- )
191
-
192
- async def selector_map(
193
- self, sessionId: str, selector: str, args: Optional[dict] = None
194
- ):
195
- return await self.client.call_tool(
196
- "selectorMap",
197
- {"sessionId": sessionId, "selector": selector, "args": args or {}},
230
+ async def import_cookies(self, sessionId: str, cookies: dict):
231
+ logger.info("import_cookies")
232
+ res = await self.client.call_tool(
233
+ "importCookies", {"sessionId": sessionId, "cookies": cookies}
198
234
  )
235
+ return res.get("structuredContent", {})
199
236
 
200
237
  # ======================================================
201
- # AI / CONTENT PARSING
238
+ # KEYBOARD
202
239
  # ======================================================
203
240
 
204
- async def parse_html_by_prompt(self, html: str, prompt: str) -> Dict[str, Any]:
205
- res = await self.client.call_tool(
206
- "parseHTMLByPrompt",
207
- {"html": html, "prompt": prompt},
241
+ async def send_key(self, sessionId: str, key: str):
242
+ logger.debug("send_key | %s", key)
243
+ return await self.client.call_tool(
244
+ "sendKey", {"sessionId": sessionId, "key": key}
208
245
  )
209
- return res.get("structuredContent", {})
210
246
 
211
247
  # ======================================================
212
- # MOUSE / PERFORM ACTIONS
248
+ # MOUSE / PERFORM
213
249
  # ======================================================
214
250
 
215
- async def perform_click_xy(
216
- self,
217
- sessionId: str,
218
- x: float,
219
- y: float,
220
- ) -> Dict[str, Any]:
221
- """
222
- Move mouse smoothly to (x, y) and left click.
223
- """
224
- res = await self.client.call_tool(
251
+ async def perform_click_xy(self, sessionId: str, x: float, y: float):
252
+ logger.debug("perform_click_xy | %s,%s", x, y)
253
+ return await self.client.call_tool(
225
254
  "perform",
226
- {
227
- "sessionId": sessionId,
228
- "action": "click",
229
- "x": float(x),
230
- "y": float(y),
231
- },
255
+ {"sessionId": sessionId, "action": "click", "x": x, "y": y},
232
256
  )
233
- return res.get("structuredContent", {})
234
257
 
235
258
  async def perform_drag(
236
- self,
237
- sessionId: str,
238
- from_x: float,
239
- from_y: float,
240
- to_x: float,
241
- to_y: float,
242
- ) -> Dict[str, Any]:
243
- """
244
- Drag mouse from (from_x, from_y) to (to_x, to_y).
245
- """
246
- res = await self.client.call_tool(
259
+ self, sessionId: str, from_x: float, from_y: float, to_x: float, to_y: float
260
+ ):
261
+ logger.debug("perform_drag")
262
+ return await self.client.call_tool(
247
263
  "perform",
248
264
  {
249
265
  "sessionId": sessionId,
250
266
  "action": "drag",
251
- "from": {"x": float(from_x), "y": float(from_y)},
252
- "to": {"x": float(to_x), "y": float(to_y)},
267
+ "from": {"x": from_x, "y": from_y},
268
+ "to": {"x": to_x, "y": to_y},
253
269
  },
254
270
  )
255
- return res.get("structuredContent", {})
256
271
 
257
- async def perform_hover(
258
- self,
259
- sessionId: str,
260
- x: float,
261
- y: float,
262
- ) -> Dict[str, Any]:
263
- """
264
- Move mouse smoothly to (x, y) without clicking.
265
- """
266
- res = await self.client.call_tool(
272
+ async def perform_hover(self, sessionId: str, x: float, y: float):
273
+ logger.debug("perform_hover | %s,%s", x, y)
274
+ return await self.client.call_tool(
267
275
  "perform",
268
- {
269
- "sessionId": sessionId,
270
- "action": "hover",
271
- "x": float(x),
272
- "y": float(y),
273
- },
274
- )
275
- return res.get("structuredContent", {})
276
-
277
- # CLEAN TEXT / READ MODE
278
- # ======================================================
279
- async def get_clean_text(self, sessionId: str) -> Dict[str, Any]:
280
- """
281
- Lấy toàn bộ visible text đã được clean trên trang hiện tại.
282
- - Bỏ script/style/iframe/svg/canvas
283
- - Chỉ text nhìn thấy (display/visibility/opacity)
284
-
285
- Returns:
286
- {
287
- "text": "...",
288
- "length": 12345
289
- }
290
- """
291
- res = await self.client.call_tool(
292
- "getCleanText",
293
- {"sessionId": sessionId},
294
- )
295
- return res.get("structuredContent", {})
296
-
297
- # ======================================================
298
- # KEYBOARD (ASYNC)
299
- # ======================================================
300
- async def send_key(
301
- self,
302
- sessionId: str,
303
- key: str,
304
- ) -> Dict[str, Any]:
305
- """
306
- Send a keyboard key to the active page (async).
307
-
308
- Args:
309
- sessionId: MCP browser session
310
- key: Keyboard key (e.g. "Enter", "Tab", "Escape", "ArrowDown",
311
- "Ctrl+a", "Ctrl+Enter")
312
-
313
- Returns:
314
- structuredContent from MCP server
315
- """
316
- return await self.call_tool_structured(
317
- "sendKey",
318
- {
319
- "sessionId": sessionId,
320
- "key": key,
321
- },
276
+ {"sessionId": sessionId, "action": "hover", "x": x, "y": y},
322
277
  )
323
278
 
324
279
  # ======================================================
325
- # BROWSER RUNTIME (NO SESSION)
280
+ # CLEAN TEXT
326
281
  # ======================================================
327
282
 
328
- async def create_browser(
329
- self,
330
- payload: Optional[Dict[str, Any]] = None,
331
- ) -> Dict[str, Any]:
332
- """
333
- Start browser runtime (idempotent).
334
- - KHÔNG tạo session
335
- - Dùng cho warm-up / admin / scheduler
336
-
337
- payload: ThirdPartyOpenRequest (optional)
338
- """
283
+ async def get_clean_text(self, sessionId: str):
284
+ logger.debug("get_clean_text")
339
285
  res = await self.client.call_tool(
340
- "createBrowser",
341
- payload or {},
286
+ "getCleanText", {"sessionId": sessionId}
342
287
  )
343
288
  return res.get("structuredContent", {})
344
289
 
345
- async def release_browser(self, pop_name: str) -> Dict[str, Any]:
346
- """
347
- Release / stop browser runtime.
348
- - KHÔNG cần payload
349
- """
350
- res = await self.client.call_tool(
351
- "releaseBrowser",
352
- {
353
- "pod_name": pop_name
354
- },
355
- )
356
- return res.get("structuredContent", {})
357
290
  # ======================================================
358
- # VIEWPORT (Playwright-style, ASYNC)
291
+ # VIEWPORT
359
292
  # ======================================================
360
293
 
361
- async def get_viewport(self, sessionId: str) -> Dict[str, Any]:
362
- """
363
- Get current browser viewport.
364
- Equivalent to Playwright page.viewportSize().
365
- """
294
+ async def get_viewport(self, sessionId: str):
295
+ logger.debug("get_viewport")
366
296
  res = await self.client.call_tool(
367
- "viewport",
368
- {
369
- "sessionId": sessionId,
370
- },
297
+ "viewport", {"sessionId": sessionId}
371
298
  )
372
299
  return res.get("structuredContent", {})
373
300
 
@@ -379,46 +306,30 @@ class MCPTools:
379
306
  height: int,
380
307
  deviceScaleFactor: float = 1.0,
381
308
  mobile: bool = False,
382
- ) -> Dict[str, Any]:
383
- """
384
- Set browser viewport (Playwright-like).
385
-
386
- Args:
387
- width: viewport width
388
- height: viewport height
389
- deviceScaleFactor: default 1.0
390
- mobile: mobile emulation flag
391
- """
309
+ ):
310
+ logger.info("set_viewport | %sx%s", width, height)
392
311
  res = await self.client.call_tool(
393
312
  "viewport",
394
313
  {
395
314
  "sessionId": sessionId,
396
315
  "viewport": {
397
- "width": int(width),
398
- "height": int(height),
399
- "deviceScaleFactor": float(deviceScaleFactor),
400
- "mobile": bool(mobile),
316
+ "width": width,
317
+ "height": height,
318
+ "deviceScaleFactor": deviceScaleFactor,
319
+ "mobile": mobile,
401
320
  },
402
321
  },
403
322
  )
404
323
  return res.get("structuredContent", {})
324
+
405
325
  # ======================================================
406
- # CURRENT TAB
326
+ # AI / PARSING
407
327
  # ======================================================
408
328
 
409
- async def current_tab(self, sessionId: str) -> Dict[str, Any]:
410
- """
411
- Get current active browser tab ID.
412
-
413
- Returns:
414
- {
415
- "tabId": "<targetId>"
416
- }
417
- """
329
+ async def parse_html_by_prompt(self, html: str, prompt: str):
330
+ logger.info("parse_html_by_prompt")
418
331
  res = await self.client.call_tool(
419
- "currentTab",
420
- {
421
- "sessionId": sessionId,
422
- },
332
+ "parseHTMLByPrompt",
333
+ {"html": html, "prompt": prompt},
423
334
  )
424
335
  return res.get("structuredContent", {})