realtimex-docs-server 0.1.4.dev5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_docs_server-0.1.4.dev5/.gitignore +18 -0
- realtimex_docs_server-0.1.4.dev5/COORDINATE_SCALING_IMPLEMENTATION.md +350 -0
- realtimex_docs_server-0.1.4.dev5/DYNAMIC_ELEMENT_VALIDATION_DESIGN.md +837 -0
- realtimex_docs_server-0.1.4.dev5/LICENSE +21 -0
- realtimex_docs_server-0.1.4.dev5/OCR_TEXT_VALIDATION_IMPLEMENTATION.md +578 -0
- realtimex_docs_server-0.1.4.dev5/PKG-INFO +42 -0
- realtimex_docs_server-0.1.4.dev5/README.md +29 -0
- realtimex_docs_server-0.1.4.dev5/pyproject.toml +31 -0
- realtimex_docs_server-0.1.4.dev5/smithery.yaml +24 -0
- realtimex_docs_server-0.1.4.dev5/src/realtimex_docs_server/__init__.py +5 -0
- realtimex_docs_server-0.1.4.dev5/src/realtimex_docs_server/__main__.py +3 -0
- realtimex_docs_server-0.1.4.dev5/src/realtimex_docs_server/docs/workflows/evn_invoice_download.md +87 -0
- realtimex_docs_server-0.1.4.dev5/src/realtimex_docs_server/docs/workflows/fpt_invoice_download.md +96 -0
- realtimex_docs_server-0.1.4.dev5/src/realtimex_docs_server/docs/workflows/imessage_send.md +58 -0
- realtimex_docs_server-0.1.4.dev5/src/realtimex_docs_server/server.py +162 -0
- realtimex_docs_server-0.1.4.dev5/system_prompt.md +51 -0
- realtimex_docs_server-0.1.4.dev5/system_prompt_imessage.md +36 -0
- realtimex_docs_server-0.1.4.dev5/test.html +21 -0
- realtimex_docs_server-0.1.4.dev5/uv.lock +1535 -0
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
# Coordinate Scaling Implementation Plan
|
|
2
|
+
|
|
3
|
+
## Objective
|
|
4
|
+
|
|
5
|
+
Modify `realtimex-pyautogui-server` to automatically scale coordinates based on screen resolution, eliminating the need for agents to manually call `calculate_screen_coordinates`.
|
|
6
|
+
|
|
7
|
+
## Current vs. Proposed Approach
|
|
8
|
+
|
|
9
|
+
### Current (Complex)
|
|
10
|
+
```python
|
|
11
|
+
# Agent workflow
|
|
12
|
+
coords = calculate_screen_coordinates(0.260, 0.083) # Tool call 1
|
|
13
|
+
move_mouse(coords["x"], coords["y"]) # Tool call 2
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
### Proposed (Simple)
|
|
17
|
+
```python
|
|
18
|
+
# Agent workflow
|
|
19
|
+
move_mouse(500, 90) # Single tool call - scaling happens automatically
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
**Key Change:** `move_mouse` internally scales coordinates based on reference resolution.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Implementation Details
|
|
27
|
+
|
|
28
|
+
### Location
|
|
29
|
+
**Repository:** `realtimex-pyautogui-server`
|
|
30
|
+
**File:** `src/realtimex_pyautogui_server/server.py` (or new helper module)
|
|
31
|
+
|
|
32
|
+
### Changes Required
|
|
33
|
+
|
|
34
|
+
#### 1. Add Internal Scaling Function
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
def _scale_coordinates(
|
|
38
|
+
x: int,
|
|
39
|
+
y: int,
|
|
40
|
+
reference_width: int = 1920,
|
|
41
|
+
reference_height: int = 1080
|
|
42
|
+
) -> tuple[int, int]:
|
|
43
|
+
"""
|
|
44
|
+
Scale coordinates from reference resolution to current screen resolution.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
x: X coordinate from reference screen
|
|
48
|
+
y: Y coordinate from reference screen
|
|
49
|
+
reference_width: Width of reference screen (default: 1920)
|
|
50
|
+
reference_height: Height of reference screen (default: 1080)
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Tuple of (scaled_x, scaled_y) for current screen
|
|
54
|
+
"""
|
|
55
|
+
current_screen = pyautogui.size()
|
|
56
|
+
|
|
57
|
+
scale_x = current_screen.width / reference_width
|
|
58
|
+
scale_y = current_screen.height / reference_height
|
|
59
|
+
|
|
60
|
+
scaled_x = int(x * scale_x)
|
|
61
|
+
scaled_y = int(y * scale_y)
|
|
62
|
+
|
|
63
|
+
return (scaled_x, scaled_y)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
#### 2. Modify `move_mouse` Tool
|
|
67
|
+
|
|
68
|
+
**Before:**
|
|
69
|
+
```python
|
|
70
|
+
@mcp.tool()
|
|
71
|
+
def move_mouse(
|
|
72
|
+
x: int = Field(description="The x-coordinate on the screen to move the mouse to."),
|
|
73
|
+
y: int = Field(description="The y-coordinate on the screen to move the mouse to."),
|
|
74
|
+
) -> Dict[str, str]:
|
|
75
|
+
"""Move the mouse to the given coordinates."""
|
|
76
|
+
try:
|
|
77
|
+
pyautogui.moveTo(x, y)
|
|
78
|
+
return _success(f"Mouse moved to coordinates ({x}, {y}).")
|
|
79
|
+
except pyautogui.FailSafeException:
|
|
80
|
+
return _failure("Operation cancelled - mouse moved to screen corner (failsafe).")
|
|
81
|
+
except Exception as exc:
|
|
82
|
+
return _failure(f"Failed to move mouse: {exc}")
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
**After:**
|
|
86
|
+
```python
|
|
87
|
+
@mcp.tool()
|
|
88
|
+
def move_mouse(
|
|
89
|
+
x: int = Field(description="The x-coordinate from reference screen (1920×1080 by default)."),
|
|
90
|
+
y: int = Field(description="The y-coordinate from reference screen (1920×1080 by default)."),
|
|
91
|
+
) -> Dict[str, str]:
|
|
92
|
+
"""Move the mouse to the given coordinates, automatically scaling for current screen resolution."""
|
|
93
|
+
try:
|
|
94
|
+
# Get reference resolution from environment or use defaults
|
|
95
|
+
ref_width = int(os.getenv("REFERENCE_SCREEN_WIDTH", "1920"))
|
|
96
|
+
ref_height = int(os.getenv("REFERENCE_SCREEN_HEIGHT", "1080"))
|
|
97
|
+
|
|
98
|
+
# Scale coordinates to current screen
|
|
99
|
+
scaled_x, scaled_y = _scale_coordinates(x, y, ref_width, ref_height)
|
|
100
|
+
|
|
101
|
+
# Move to scaled position
|
|
102
|
+
pyautogui.moveTo(scaled_x, scaled_y)
|
|
103
|
+
|
|
104
|
+
return _success(f"Mouse moved to coordinates ({x}, {y}) [scaled to ({scaled_x}, {scaled_y})].")
|
|
105
|
+
except pyautogui.FailSafeException:
|
|
106
|
+
return _failure("Operation cancelled - mouse moved to screen corner (failsafe).")
|
|
107
|
+
except Exception as exc:
|
|
108
|
+
return _failure(f"Failed to move mouse: {exc}")
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
#### 3. Modify `drag_mouse` Tool (Similar Changes)
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
@mcp.tool()
|
|
115
|
+
def drag_mouse(
|
|
116
|
+
x: int = Field(description="The x-coordinate to drag to (from reference screen)."),
|
|
117
|
+
y: int = Field(description="The y-coordinate to drag to (from reference screen)."),
|
|
118
|
+
duration: float = Field(default=0.5, ge=0.0, le=10.0, description="Duration of the drag in seconds."),
|
|
119
|
+
) -> Dict[str, str]:
|
|
120
|
+
"""Drag the mouse to a target location, automatically scaling for current screen resolution."""
|
|
121
|
+
try:
|
|
122
|
+
ref_width = int(os.getenv("REFERENCE_SCREEN_WIDTH", "1920"))
|
|
123
|
+
ref_height = int(os.getenv("REFERENCE_SCREEN_HEIGHT", "1080"))
|
|
124
|
+
|
|
125
|
+
scaled_x, scaled_y = _scale_coordinates(x, y, ref_width, ref_height)
|
|
126
|
+
|
|
127
|
+
pyautogui.dragTo(scaled_x, scaled_y, duration=duration)
|
|
128
|
+
return _success(f"Mouse dragged to ({x}, {y}) [scaled to ({scaled_x}, {scaled_y})] over {duration} seconds.")
|
|
129
|
+
except pyautogui.FailSafeException:
|
|
130
|
+
return _failure("Operation cancelled - mouse in screen corner (failsafe).")
|
|
131
|
+
except Exception as exc:
|
|
132
|
+
return _failure(f"Failed to drag mouse: {exc}")
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Configuration
|
|
138
|
+
|
|
139
|
+
### Environment Variables
|
|
140
|
+
|
|
141
|
+
Add to server configuration or Docker/systemd environment:
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
# Reference screen resolution (coordinates in system prompt are based on this)
|
|
145
|
+
REFERENCE_SCREEN_WIDTH=1920
|
|
146
|
+
REFERENCE_SCREEN_HEIGHT=1080
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Default Behavior
|
|
150
|
+
|
|
151
|
+
- **Default reference:** 1920×1080 (most common development resolution)
|
|
152
|
+
- **Auto-scaling:** Always enabled
|
|
153
|
+
- **Transparency:** Success messages show both original and scaled coordinates for debugging
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## System Prompt Updates
|
|
158
|
+
|
|
159
|
+
### Before (Complex)
|
|
160
|
+
```markdown
|
|
161
|
+
| Element | Normalized Coords | Ref Screen | Description |
|
|
162
|
+
|---------|------------------|------------|-------------|
|
|
163
|
+
| address_bar | (0.260, 0.083) | 1920×1080 | Browser address bar |
|
|
164
|
+
|
|
165
|
+
**Usage:**
|
|
166
|
+
1. coords = calculate_screen_coordinates(0.260, 0.083)
|
|
167
|
+
2. move_mouse(coords["x"], coords["y"])
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### After (Simple)
|
|
171
|
+
```markdown
|
|
172
|
+
Reference Screen: 1920×1080
|
|
173
|
+
|
|
174
|
+
| Element | Coordinates | Description |
|
|
175
|
+
|---------|------------|-------------|
|
|
176
|
+
| address_bar | (500, 90) | Browser address bar |
|
|
177
|
+
| search_button | (960, 540) | Center search button |
|
|
178
|
+
| settings_icon | (1824, 54) | Top-right settings |
|
|
179
|
+
|
|
180
|
+
**Usage:**
|
|
181
|
+
- move_mouse(500, 90) # Coordinates auto-scale to your screen
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**Instructions for agents:**
|
|
185
|
+
```markdown
|
|
186
|
+
All coordinates in this document are based on a 1920×1080 reference screen.
|
|
187
|
+
The move_mouse tool automatically scales these coordinates to your actual screen resolution.
|
|
188
|
+
Simply use the coordinates as documented - no calculation needed.
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## Testing Plan
|
|
194
|
+
|
|
195
|
+
### Test Cases
|
|
196
|
+
|
|
197
|
+
1. **Same Resolution (1920×1080)**
|
|
198
|
+
- Input: `move_mouse(500, 90)`
|
|
199
|
+
- Expected: Mouse at (500, 90)
|
|
200
|
+
- Scaling: 1:1
|
|
201
|
+
|
|
202
|
+
2. **Higher Resolution (2560×1440)**
|
|
203
|
+
- Input: `move_mouse(500, 90)`
|
|
204
|
+
- Expected: Mouse at (666, 120)
|
|
205
|
+
- Scaling: 1.33x both axes
|
|
206
|
+
|
|
207
|
+
3. **Lower Resolution (1366×768)**
|
|
208
|
+
- Input: `move_mouse(500, 90)`
|
|
209
|
+
- Expected: Mouse at (355, 64)
|
|
210
|
+
- Scaling: ~0.71x both axes
|
|
211
|
+
|
|
212
|
+
4. **Custom Reference (via env vars)**
|
|
213
|
+
- Set: `REFERENCE_SCREEN_WIDTH=2560`, `REFERENCE_SCREEN_HEIGHT=1440`
|
|
214
|
+
- Input: `move_mouse(1280, 720)`
|
|
215
|
+
- On 1920×1080: Expected (960, 540)
|
|
216
|
+
- Scaling: 0.75x both axes
|
|
217
|
+
|
|
218
|
+
5. **Drag Mouse**
|
|
219
|
+
- Same scaling logic applies
|
|
220
|
+
- Verify smooth dragging to scaled coordinates
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## Migration Path
|
|
225
|
+
|
|
226
|
+
### Phase 1: Implementation
|
|
227
|
+
1. Add `_scale_coordinates()` helper function
|
|
228
|
+
2. Modify `move_mouse()` tool
|
|
229
|
+
3. Modify `drag_mouse()` tool
|
|
230
|
+
4. Add environment variable support
|
|
231
|
+
5. Update tool descriptions
|
|
232
|
+
|
|
233
|
+
### Phase 2: Documentation
|
|
234
|
+
1. Update README with new behavior
|
|
235
|
+
2. Document environment variables
|
|
236
|
+
3. Update system prompt templates
|
|
237
|
+
4. Add scaling examples
|
|
238
|
+
|
|
239
|
+
### Phase 3: Testing
|
|
240
|
+
1. Test on reference resolution (1920×1080)
|
|
241
|
+
2. Test on common resolutions (2560×1440, 1366×768)
|
|
242
|
+
3. Test with custom reference resolution
|
|
243
|
+
4. Verify success messages include scaled coordinates
|
|
244
|
+
|
|
245
|
+
### Phase 4: Rollout
|
|
246
|
+
1. Update `realtimex-pyautogui-server` package
|
|
247
|
+
2. Update system prompts to use absolute coordinates
|
|
248
|
+
3. Remove `calculate_screen_coordinates` from `realtimex-computer-use` (no longer needed)
|
|
249
|
+
4. Update agent documentation
|
|
250
|
+
|
|
251
|
+
---
|
|
252
|
+
|
|
253
|
+
## Benefits Summary
|
|
254
|
+
|
|
255
|
+
✅ **Simpler agent workflows** - One tool call instead of two
|
|
256
|
+
✅ **Lower token costs** - No extra tool call overhead
|
|
257
|
+
✅ **Less error-prone** - No chance of forgetting to scale
|
|
258
|
+
✅ **Easier documentation** - Absolute coordinates are more intuitive
|
|
259
|
+
✅ **Better maintainability** - Scaling logic where it belongs
|
|
260
|
+
✅ **Transparent debugging** - Success messages show both coordinates
|
|
261
|
+
✅ **Flexible configuration** - Environment variables for custom references
|
|
262
|
+
|
|
263
|
+
---
|
|
264
|
+
|
|
265
|
+
## Risks & Mitigations
|
|
266
|
+
|
|
267
|
+
### Risk: Breaking Existing Workflows
|
|
268
|
+
**Mitigation:** This is a new server (`realtimex-pyautogui-server`), not modifying existing production code.
|
|
269
|
+
|
|
270
|
+
### Risk: Incorrect Scaling on Non-Standard Displays
|
|
271
|
+
**Mitigation:**
|
|
272
|
+
- Environment variables allow custom reference resolution
|
|
273
|
+
- Success messages show scaled coordinates for verification
|
|
274
|
+
- Test on multiple common resolutions
|
|
275
|
+
|
|
276
|
+
### Risk: Browser Chrome Doesn't Scale Linearly
|
|
277
|
+
**Mitigation:**
|
|
278
|
+
- Document that browser UI elements may need testing/adjustment
|
|
279
|
+
- Future enhancement: Window-relative coordinates mode
|
|
280
|
+
- Suggest testing coordinates on target resolutions
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
284
|
+
## Alternative Considered: Keep Both Approaches
|
|
285
|
+
|
|
286
|
+
**Option:** Provide both `move_mouse` (auto-scaling) and `move_mouse_absolute` (no scaling)
|
|
287
|
+
|
|
288
|
+
**Decision:** Not recommended
|
|
289
|
+
- Adds complexity
|
|
290
|
+
- Agents might use wrong tool
|
|
291
|
+
- Two ways to do the same thing
|
|
292
|
+
- Auto-scaling should work for 99% of cases
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
296
|
+
## Future Enhancements
|
|
297
|
+
|
|
298
|
+
**Phase 2: Window-Relative Mode**
|
|
299
|
+
```python
|
|
300
|
+
move_mouse(x, y, relative_to="active_window")
|
|
301
|
+
```
|
|
302
|
+
Calculate coordinates relative to active window instead of screen.
|
|
303
|
+
|
|
304
|
+
**Phase 3: Multi-Monitor Support**
|
|
305
|
+
```python
|
|
306
|
+
move_mouse(x, y, monitor=1)
|
|
307
|
+
```
|
|
308
|
+
Support coordinate scaling across multiple monitors.
|
|
309
|
+
|
|
310
|
+
**Phase 4: Coordinate Validation**
|
|
311
|
+
```python
|
|
312
|
+
validate_coordinates(x, y) # Tool to check if coordinates are on-screen
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
---
|
|
316
|
+
|
|
317
|
+
## Implementation Checklist
|
|
318
|
+
|
|
319
|
+
### Code Changes (realtimex-pyautogui-server)
|
|
320
|
+
- [ ] Add `_scale_coordinates()` helper function
|
|
321
|
+
- [ ] Modify `move_mouse()` to use scaling
|
|
322
|
+
- [ ] Modify `drag_mouse()` to use scaling
|
|
323
|
+
- [ ] Add environment variable support (REFERENCE_SCREEN_WIDTH/HEIGHT)
|
|
324
|
+
- [ ] Update tool descriptions
|
|
325
|
+
- [ ] Add scaled coordinates to success messages
|
|
326
|
+
|
|
327
|
+
### Documentation
|
|
328
|
+
- [ ] Update README with scaling behavior
|
|
329
|
+
- [ ] Document environment variables
|
|
330
|
+
- [ ] Add usage examples
|
|
331
|
+
- [ ] Create migration guide
|
|
332
|
+
|
|
333
|
+
### Testing
|
|
334
|
+
- [ ] Test on 1920×1080 (reference)
|
|
335
|
+
- [ ] Test on 2560×1440 (higher res)
|
|
336
|
+
- [ ] Test on 1366×768 (lower res)
|
|
337
|
+
- [ ] Test with custom reference resolution
|
|
338
|
+
- [ ] Verify drag_mouse scaling
|
|
339
|
+
|
|
340
|
+
### Cleanup
|
|
341
|
+
- [ ] Remove `calculate_screen_coordinates` from realtimex-computer-use (optional)
|
|
342
|
+
- [ ] Update COORDINATE_SCALING_DESIGN.md to reflect new approach
|
|
343
|
+
- [ ] Update system prompt templates
|
|
344
|
+
|
|
345
|
+
---
|
|
346
|
+
|
|
347
|
+
**Document Version:** 1.0
|
|
348
|
+
**Author:** RTA
|
|
349
|
+
**Status:** Proposed - Ready for Review and Implementation
|
|
350
|
+
**Delegation Target:** `realtimex-pyautogui-server` repository
|