lumivor 0.1.7__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- lumivor/README.md +51 -0
- lumivor/__init__.py +25 -0
- lumivor/agent/message_manager/service.py +252 -0
- lumivor/agent/message_manager/tests.py +246 -0
- lumivor/agent/message_manager/views.py +37 -0
- lumivor/agent/prompts.py +208 -0
- lumivor/agent/service.py +1017 -0
- lumivor/agent/tests.py +204 -0
- lumivor/agent/views.py +272 -0
- lumivor/browser/browser.py +208 -0
- lumivor/browser/context.py +993 -0
- lumivor/browser/tests/screenshot_test.py +38 -0
- lumivor/browser/tests/test_clicks.py +77 -0
- lumivor/browser/views.py +48 -0
- lumivor/controller/registry/service.py +140 -0
- lumivor/controller/registry/views.py +71 -0
- lumivor/controller/service.py +557 -0
- lumivor/controller/views.py +47 -0
- lumivor/dom/__init__.py +0 -0
- lumivor/dom/buildDomTree.js +428 -0
- lumivor/dom/history_tree_processor/service.py +112 -0
- lumivor/dom/history_tree_processor/view.py +33 -0
- lumivor/dom/service.py +100 -0
- lumivor/dom/tests/extraction_test.py +44 -0
- lumivor/dom/tests/process_dom_test.py +40 -0
- lumivor/dom/views.py +187 -0
- lumivor/logging_config.py +128 -0
- lumivor/telemetry/service.py +114 -0
- lumivor/telemetry/views.py +51 -0
- lumivor/utils.py +54 -0
- lumivor-0.1.7.dist-info/METADATA +100 -0
- lumivor-0.1.7.dist-info/RECORD +34 -0
- lumivor-0.1.7.dist-info/WHEEL +4 -0
- lumivor-0.1.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,428 @@
|
|
1
|
+
(
|
2
|
+
doHighlightElements = true
|
3
|
+
) => {
|
4
|
+
let highlightIndex = 0; // Reset highlight index
|
5
|
+
|
6
|
+
function highlightElement(element, index, parentIframe = null) {
|
7
|
+
// Create or get highlight container
|
8
|
+
let container = document.getElementById('playwright-highlight-container');
|
9
|
+
if (!container) {
|
10
|
+
container = document.createElement('div');
|
11
|
+
container.id = 'playwright-highlight-container';
|
12
|
+
container.style.position = 'fixed';
|
13
|
+
container.style.pointerEvents = 'none';
|
14
|
+
container.style.top = '0';
|
15
|
+
container.style.left = '0';
|
16
|
+
container.style.width = '100%';
|
17
|
+
container.style.height = '100%';
|
18
|
+
container.style.zIndex = '2147483647'; // Maximum z-index value
|
19
|
+
document.documentElement.appendChild(container);
|
20
|
+
}
|
21
|
+
|
22
|
+
// Generate a color based on the index
|
23
|
+
const colors = [
|
24
|
+
'#FF0000', '#00FF00', '#0000FF', '#FFA500',
|
25
|
+
'#800080', '#008080', '#FF69B4', '#4B0082',
|
26
|
+
'#FF4500', '#2E8B57', '#DC143C', '#4682B4'
|
27
|
+
];
|
28
|
+
const colorIndex = index % colors.length;
|
29
|
+
const baseColor = colors[colorIndex];
|
30
|
+
const backgroundColor = `${baseColor}1A`; // 10% opacity version of the color
|
31
|
+
|
32
|
+
// Create highlight overlay
|
33
|
+
const overlay = document.createElement('div');
|
34
|
+
overlay.style.position = 'absolute';
|
35
|
+
overlay.style.border = `2px solid ${baseColor}`;
|
36
|
+
overlay.style.backgroundColor = backgroundColor;
|
37
|
+
overlay.style.pointerEvents = 'none';
|
38
|
+
overlay.style.boxSizing = 'border-box';
|
39
|
+
|
40
|
+
// Position overlay based on element
|
41
|
+
const rect = element.getBoundingClientRect();
|
42
|
+
let top = rect.top;
|
43
|
+
let left = rect.left;
|
44
|
+
|
45
|
+
// Adjust position if element is inside an iframe
|
46
|
+
if (parentIframe) {
|
47
|
+
const iframeRect = parentIframe.getBoundingClientRect();
|
48
|
+
top += iframeRect.top;
|
49
|
+
left += iframeRect.left;
|
50
|
+
}
|
51
|
+
|
52
|
+
overlay.style.top = `${top}px`;
|
53
|
+
overlay.style.left = `${left}px`;
|
54
|
+
overlay.style.width = `${rect.width}px`;
|
55
|
+
overlay.style.height = `${rect.height}px`;
|
56
|
+
|
57
|
+
// Create label
|
58
|
+
const label = document.createElement('div');
|
59
|
+
label.className = 'playwright-highlight-label';
|
60
|
+
label.style.position = 'absolute';
|
61
|
+
label.style.background = baseColor;
|
62
|
+
label.style.color = 'white';
|
63
|
+
label.style.padding = '1px 4px';
|
64
|
+
label.style.borderRadius = '4px';
|
65
|
+
label.style.fontSize = `${Math.min(12, Math.max(8, rect.height / 2))}px`; // Responsive font size
|
66
|
+
label.textContent = index;
|
67
|
+
|
68
|
+
// Calculate label position
|
69
|
+
const labelWidth = 20; // Approximate width
|
70
|
+
const labelHeight = 16; // Approximate height
|
71
|
+
|
72
|
+
// Default position (top-right corner inside the box)
|
73
|
+
let labelTop = top + 2;
|
74
|
+
let labelLeft = left + rect.width - labelWidth - 2;
|
75
|
+
|
76
|
+
// Adjust if box is too small
|
77
|
+
if (rect.width < labelWidth + 4 || rect.height < labelHeight + 4) {
|
78
|
+
// Position outside the box if it's too small
|
79
|
+
labelTop = top - labelHeight - 2;
|
80
|
+
labelLeft = left + rect.width - labelWidth;
|
81
|
+
}
|
82
|
+
|
83
|
+
// Ensure label stays within viewport
|
84
|
+
if (labelTop < 0) labelTop = top + 2;
|
85
|
+
if (labelLeft < 0) labelLeft = left + 2;
|
86
|
+
if (labelLeft + labelWidth > window.innerWidth) {
|
87
|
+
labelLeft = left + rect.width - labelWidth - 2;
|
88
|
+
}
|
89
|
+
|
90
|
+
label.style.top = `${labelTop}px`;
|
91
|
+
label.style.left = `${labelLeft}px`;
|
92
|
+
|
93
|
+
// Add to container
|
94
|
+
container.appendChild(overlay);
|
95
|
+
container.appendChild(label);
|
96
|
+
|
97
|
+
// Store reference for cleanup
|
98
|
+
element.setAttribute('browser-user-highlight-id', `playwright-highlight-${index}`);
|
99
|
+
|
100
|
+
return index + 1;
|
101
|
+
}
|
102
|
+
|
103
|
+
|
104
|
+
// Helper function to generate XPath as a tree
|
105
|
+
function getXPathTree(element, stopAtBoundary = true) {
|
106
|
+
const segments = [];
|
107
|
+
let currentElement = element;
|
108
|
+
|
109
|
+
while (currentElement && currentElement.nodeType === Node.ELEMENT_NODE) {
|
110
|
+
// Stop if we hit a shadow root or iframe
|
111
|
+
if (stopAtBoundary && (currentElement.parentNode instanceof ShadowRoot || currentElement.parentNode instanceof HTMLIFrameElement)) {
|
112
|
+
break;
|
113
|
+
}
|
114
|
+
|
115
|
+
let index = 0;
|
116
|
+
let sibling = currentElement.previousSibling;
|
117
|
+
while (sibling) {
|
118
|
+
if (sibling.nodeType === Node.ELEMENT_NODE &&
|
119
|
+
sibling.nodeName === currentElement.nodeName) {
|
120
|
+
index++;
|
121
|
+
}
|
122
|
+
sibling = sibling.previousSibling;
|
123
|
+
}
|
124
|
+
|
125
|
+
const tagName = currentElement.nodeName.toLowerCase();
|
126
|
+
const xpathIndex = index > 0 ? `[${index + 1}]` : '';
|
127
|
+
segments.unshift(`${tagName}${xpathIndex}`);
|
128
|
+
|
129
|
+
currentElement = currentElement.parentNode;
|
130
|
+
}
|
131
|
+
|
132
|
+
return segments.join('/');
|
133
|
+
}
|
134
|
+
|
135
|
+
// Helper function to check if element is accepted
|
136
|
+
function isElementAccepted(element) {
|
137
|
+
const leafElementDenyList = new Set(['svg', 'script', 'style', 'link', 'meta']);
|
138
|
+
return !leafElementDenyList.has(element.tagName.toLowerCase());
|
139
|
+
}
|
140
|
+
|
141
|
+
// Helper function to check if element is interactive
|
142
|
+
function isInteractiveElement(element) {
|
143
|
+
// Base interactive elements and roles
|
144
|
+
const interactiveElements = new Set([
|
145
|
+
'a', 'button', 'details', 'embed', 'input', 'label',
|
146
|
+
'menu', 'menuitem', 'object', 'select', 'textarea', 'summary'
|
147
|
+
]);
|
148
|
+
|
149
|
+
const interactiveRoles = new Set([
|
150
|
+
'button', 'menu', 'menuitem', 'link', 'checkbox', 'radio',
|
151
|
+
'slider', 'tab', 'tabpanel', 'textbox', 'combobox', 'grid',
|
152
|
+
'listbox', 'option', 'progressbar', 'scrollbar', 'searchbox',
|
153
|
+
'switch', 'tree', 'treeitem', 'spinbutton', 'tooltip', 'a-button-inner', 'a-dropdown-button', 'click',
|
154
|
+
'menuitemcheckbox', 'menuitemradio', 'a-button-text', 'button-text', 'button-icon', 'button-icon-only', 'button-text-icon-only', 'dropdown', 'combobox'
|
155
|
+
]);
|
156
|
+
|
157
|
+
const tagName = element.tagName.toLowerCase();
|
158
|
+
const role = element.getAttribute('role');
|
159
|
+
const ariaRole = element.getAttribute('aria-role');
|
160
|
+
const tabIndex = element.getAttribute('tabindex');
|
161
|
+
|
162
|
+
// Basic role/attribute checks
|
163
|
+
const hasInteractiveRole = interactiveElements.has(tagName) ||
|
164
|
+
interactiveRoles.has(role) ||
|
165
|
+
interactiveRoles.has(ariaRole) ||
|
166
|
+
(tabIndex !== null && tabIndex !== '-1') ||
|
167
|
+
element.getAttribute('data-action') === 'a-dropdown-select' ||
|
168
|
+
element.getAttribute('data-action') === 'a-dropdown-button';
|
169
|
+
|
170
|
+
if (hasInteractiveRole) return true;
|
171
|
+
|
172
|
+
// Get computed style
|
173
|
+
const style = window.getComputedStyle(element);
|
174
|
+
|
175
|
+
// Check if element has click-like styling
|
176
|
+
// const hasClickStyling = style.cursor === 'pointer' ||
|
177
|
+
// element.style.cursor === 'pointer' ||
|
178
|
+
// style.pointerEvents !== 'none';
|
179
|
+
|
180
|
+
// Check for event listeners
|
181
|
+
const hasClickHandler = element.onclick !== null ||
|
182
|
+
element.getAttribute('onclick') !== null ||
|
183
|
+
element.hasAttribute('ng-click') ||
|
184
|
+
element.hasAttribute('@click') ||
|
185
|
+
element.hasAttribute('v-on:click');
|
186
|
+
|
187
|
+
// Helper function to safely get event listeners
|
188
|
+
function getEventListeners(el) {
|
189
|
+
try {
|
190
|
+
// Try to get listeners using Chrome DevTools API
|
191
|
+
return window.getEventListeners?.(el) || {};
|
192
|
+
} catch (e) {
|
193
|
+
// Fallback: check for common event properties
|
194
|
+
const listeners = {};
|
195
|
+
|
196
|
+
// List of common event types to check
|
197
|
+
const eventTypes = [
|
198
|
+
'click', 'mousedown', 'mouseup',
|
199
|
+
'touchstart', 'touchend',
|
200
|
+
'keydown', 'keyup', 'focus', 'blur'
|
201
|
+
];
|
202
|
+
|
203
|
+
for (const type of eventTypes) {
|
204
|
+
const handler = el[`on${type}`];
|
205
|
+
if (handler) {
|
206
|
+
listeners[type] = [{
|
207
|
+
listener: handler,
|
208
|
+
useCapture: false
|
209
|
+
}];
|
210
|
+
}
|
211
|
+
}
|
212
|
+
|
213
|
+
return listeners;
|
214
|
+
}
|
215
|
+
}
|
216
|
+
|
217
|
+
// Check for click-related events on the element itself
|
218
|
+
const listeners = getEventListeners(element);
|
219
|
+
const hasClickListeners = listeners && (
|
220
|
+
listeners.click?.length > 0 ||
|
221
|
+
listeners.mousedown?.length > 0 ||
|
222
|
+
listeners.mouseup?.length > 0 ||
|
223
|
+
listeners.touchstart?.length > 0 ||
|
224
|
+
listeners.touchend?.length > 0
|
225
|
+
);
|
226
|
+
|
227
|
+
// Check for ARIA properties that suggest interactivity
|
228
|
+
const hasAriaProps = element.hasAttribute('aria-expanded') ||
|
229
|
+
element.hasAttribute('aria-pressed') ||
|
230
|
+
element.hasAttribute('aria-selected') ||
|
231
|
+
element.hasAttribute('aria-checked');
|
232
|
+
|
233
|
+
// Check for form-related functionality
|
234
|
+
const isFormRelated = element.form !== undefined ||
|
235
|
+
element.hasAttribute('contenteditable') ||
|
236
|
+
style.userSelect !== 'none';
|
237
|
+
|
238
|
+
// Check if element is draggable
|
239
|
+
const isDraggable = element.draggable ||
|
240
|
+
element.getAttribute('draggable') === 'true';
|
241
|
+
|
242
|
+
return hasAriaProps ||
|
243
|
+
// hasClickStyling ||
|
244
|
+
hasClickHandler ||
|
245
|
+
hasClickListeners ||
|
246
|
+
// isFormRelated ||
|
247
|
+
isDraggable;
|
248
|
+
|
249
|
+
}
|
250
|
+
|
251
|
+
// Helper function to check if element is visible
|
252
|
+
function isElementVisible(element) {
|
253
|
+
const style = window.getComputedStyle(element);
|
254
|
+
return element.offsetWidth > 0 &&
|
255
|
+
element.offsetHeight > 0 &&
|
256
|
+
style.visibility !== 'hidden' &&
|
257
|
+
style.display !== 'none';
|
258
|
+
}
|
259
|
+
|
260
|
+
// Helper function to check if element is the top element at its position
|
261
|
+
function isTopElement(element) {
|
262
|
+
// Find the correct document context and root element
|
263
|
+
let doc = element.ownerDocument;
|
264
|
+
|
265
|
+
// If we're in an iframe, elements are considered top by default
|
266
|
+
if (doc !== window.document) {
|
267
|
+
return true;
|
268
|
+
}
|
269
|
+
|
270
|
+
// For shadow DOM, we need to check within its own root context
|
271
|
+
const shadowRoot = element.getRootNode();
|
272
|
+
if (shadowRoot instanceof ShadowRoot) {
|
273
|
+
const rect = element.getBoundingClientRect();
|
274
|
+
const point = { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 };
|
275
|
+
|
276
|
+
try {
|
277
|
+
// Use shadow root's elementFromPoint to check within shadow DOM context
|
278
|
+
const topEl = shadowRoot.elementFromPoint(point.x, point.y);
|
279
|
+
if (!topEl) return false;
|
280
|
+
|
281
|
+
// Check if the element or any of its parents match our target element
|
282
|
+
let current = topEl;
|
283
|
+
while (current && current !== shadowRoot) {
|
284
|
+
if (current === element) return true;
|
285
|
+
current = current.parentElement;
|
286
|
+
}
|
287
|
+
return false;
|
288
|
+
} catch (e) {
|
289
|
+
return true; // If we can't determine, consider it visible
|
290
|
+
}
|
291
|
+
}
|
292
|
+
|
293
|
+
// Regular DOM elements
|
294
|
+
const rect = element.getBoundingClientRect();
|
295
|
+
const point = { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 };
|
296
|
+
|
297
|
+
try {
|
298
|
+
const topEl = document.elementFromPoint(point.x, point.y);
|
299
|
+
if (!topEl) return false;
|
300
|
+
|
301
|
+
let current = topEl;
|
302
|
+
while (current && current !== document.documentElement) {
|
303
|
+
if (current === element) return true;
|
304
|
+
current = current.parentElement;
|
305
|
+
}
|
306
|
+
return false;
|
307
|
+
} catch (e) {
|
308
|
+
return true;
|
309
|
+
}
|
310
|
+
}
|
311
|
+
|
312
|
+
// Helper function to check if text node is visible
|
313
|
+
function isTextNodeVisible(textNode) {
|
314
|
+
const range = document.createRange();
|
315
|
+
range.selectNodeContents(textNode);
|
316
|
+
const rect = range.getBoundingClientRect();
|
317
|
+
|
318
|
+
return rect.width !== 0 &&
|
319
|
+
rect.height !== 0 &&
|
320
|
+
rect.top >= 0 &&
|
321
|
+
rect.top <= window.innerHeight &&
|
322
|
+
textNode.parentElement?.checkVisibility({
|
323
|
+
checkOpacity: true,
|
324
|
+
checkVisibilityCSS: true
|
325
|
+
});
|
326
|
+
}
|
327
|
+
|
328
|
+
|
329
|
+
// Function to traverse the DOM and create nested JSON
|
330
|
+
function buildDomTree(node, parentIframe = null) {
|
331
|
+
if (!node) return null;
|
332
|
+
|
333
|
+
// Special case for text nodes
|
334
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
335
|
+
const textContent = node.textContent.trim();
|
336
|
+
if (textContent && isTextNodeVisible(node)) {
|
337
|
+
return {
|
338
|
+
type: "TEXT_NODE",
|
339
|
+
text: textContent,
|
340
|
+
isVisible: true,
|
341
|
+
};
|
342
|
+
}
|
343
|
+
return null;
|
344
|
+
}
|
345
|
+
|
346
|
+
// Check if element is accepted
|
347
|
+
if (node.nodeType === Node.ELEMENT_NODE && !isElementAccepted(node)) {
|
348
|
+
return null;
|
349
|
+
}
|
350
|
+
|
351
|
+
const nodeData = {
|
352
|
+
tagName: node.tagName ? node.tagName.toLowerCase() : null,
|
353
|
+
attributes: {},
|
354
|
+
xpath: node.nodeType === Node.ELEMENT_NODE ? getXPathTree(node, true) : null,
|
355
|
+
children: [],
|
356
|
+
};
|
357
|
+
|
358
|
+
// Copy all attributes if the node is an element
|
359
|
+
if (node.nodeType === Node.ELEMENT_NODE && node.attributes) {
|
360
|
+
// Use getAttributeNames() instead of directly iterating attributes
|
361
|
+
const attributeNames = node.getAttributeNames?.() || [];
|
362
|
+
for (const name of attributeNames) {
|
363
|
+
nodeData.attributes[name] = node.getAttribute(name);
|
364
|
+
}
|
365
|
+
}
|
366
|
+
|
367
|
+
if (node.nodeType === Node.ELEMENT_NODE) {
|
368
|
+
const isInteractive = isInteractiveElement(node);
|
369
|
+
const isVisible = isElementVisible(node);
|
370
|
+
const isTop = isTopElement(node);
|
371
|
+
|
372
|
+
nodeData.isInteractive = isInteractive;
|
373
|
+
nodeData.isVisible = isVisible;
|
374
|
+
nodeData.isTopElement = isTop;
|
375
|
+
|
376
|
+
// Highlight if element meets all criteria and highlighting is enabled
|
377
|
+
if (isInteractive && isVisible && isTop) {
|
378
|
+
nodeData.highlightIndex = highlightIndex++;
|
379
|
+
if (doHighlightElements) {
|
380
|
+
highlightElement(node, nodeData.highlightIndex, parentIframe);
|
381
|
+
}
|
382
|
+
}
|
383
|
+
}
|
384
|
+
|
385
|
+
// Only add iframeContext if we're inside an iframe
|
386
|
+
// if (parentIframe) {
|
387
|
+
// nodeData.iframeContext = `iframe[src="${parentIframe.src || ''}"]`;
|
388
|
+
// }
|
389
|
+
|
390
|
+
// Only add shadowRoot field if it exists
|
391
|
+
if (node.shadowRoot) {
|
392
|
+
nodeData.shadowRoot = true;
|
393
|
+
}
|
394
|
+
|
395
|
+
// Handle shadow DOM
|
396
|
+
if (node.shadowRoot) {
|
397
|
+
const shadowChildren = Array.from(node.shadowRoot.childNodes).map(child =>
|
398
|
+
buildDomTree(child, parentIframe)
|
399
|
+
);
|
400
|
+
nodeData.children.push(...shadowChildren);
|
401
|
+
}
|
402
|
+
|
403
|
+
// Handle iframes
|
404
|
+
if (node.tagName === 'IFRAME') {
|
405
|
+
try {
|
406
|
+
const iframeDoc = node.contentDocument || node.contentWindow.document;
|
407
|
+
if (iframeDoc) {
|
408
|
+
const iframeChildren = Array.from(iframeDoc.body.childNodes).map(child =>
|
409
|
+
buildDomTree(child, node)
|
410
|
+
);
|
411
|
+
nodeData.children.push(...iframeChildren);
|
412
|
+
}
|
413
|
+
} catch (e) {
|
414
|
+
console.warn('Unable to access iframe:', node);
|
415
|
+
}
|
416
|
+
} else {
|
417
|
+
const children = Array.from(node.childNodes).map(child =>
|
418
|
+
buildDomTree(child, parentIframe)
|
419
|
+
);
|
420
|
+
nodeData.children.push(...children);
|
421
|
+
}
|
422
|
+
|
423
|
+
return nodeData;
|
424
|
+
}
|
425
|
+
|
426
|
+
|
427
|
+
return buildDomTree(document.body);
|
428
|
+
}
|
@@ -0,0 +1,112 @@
|
|
1
|
+
import hashlib
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from lumivor.dom.history_tree_processor.view import DOMHistoryElement, HashedDomElement
|
6
|
+
from lumivor.dom.views import DOMElementNode
|
7
|
+
|
8
|
+
|
9
|
+
class HistoryTreeProcessor:
|
10
|
+
""" "
|
11
|
+
Operations on the DOM elements
|
12
|
+
|
13
|
+
@dev be careful - text nodes can change even if elements stay the same
|
14
|
+
"""
|
15
|
+
|
16
|
+
@staticmethod
|
17
|
+
def convert_dom_element_to_history_element(dom_element: DOMElementNode) -> DOMHistoryElement:
|
18
|
+
parent_branch_path = HistoryTreeProcessor._get_parent_branch_path(
|
19
|
+
dom_element)
|
20
|
+
return DOMHistoryElement(
|
21
|
+
dom_element.tag_name,
|
22
|
+
dom_element.xpath,
|
23
|
+
dom_element.highlight_index,
|
24
|
+
parent_branch_path,
|
25
|
+
dom_element.attributes,
|
26
|
+
dom_element.shadow_root,
|
27
|
+
)
|
28
|
+
|
29
|
+
@staticmethod
|
30
|
+
def find_history_element_in_tree(
|
31
|
+
dom_history_element: DOMHistoryElement, tree: DOMElementNode
|
32
|
+
) -> Optional[DOMElementNode]:
|
33
|
+
hashed_dom_history_element = HistoryTreeProcessor._hash_dom_history_element(
|
34
|
+
dom_history_element
|
35
|
+
)
|
36
|
+
|
37
|
+
def process_node(node: DOMElementNode):
|
38
|
+
if node.highlight_index is not None:
|
39
|
+
hashed_node = HistoryTreeProcessor._hash_dom_element(node)
|
40
|
+
if hashed_node == hashed_dom_history_element:
|
41
|
+
return node
|
42
|
+
for child in node.children:
|
43
|
+
if isinstance(child, DOMElementNode):
|
44
|
+
result = process_node(child)
|
45
|
+
if result is not None:
|
46
|
+
return result
|
47
|
+
return None
|
48
|
+
|
49
|
+
return process_node(tree)
|
50
|
+
|
51
|
+
@staticmethod
|
52
|
+
def compare_history_element_and_dom_element(
|
53
|
+
dom_history_element: DOMHistoryElement, dom_element: DOMElementNode
|
54
|
+
) -> bool:
|
55
|
+
hashed_dom_history_element = HistoryTreeProcessor._hash_dom_history_element(
|
56
|
+
dom_history_element
|
57
|
+
)
|
58
|
+
hashed_dom_element = HistoryTreeProcessor._hash_dom_element(
|
59
|
+
dom_element)
|
60
|
+
|
61
|
+
return hashed_dom_history_element == hashed_dom_element
|
62
|
+
|
63
|
+
@staticmethod
|
64
|
+
def _hash_dom_history_element(dom_history_element: DOMHistoryElement) -> HashedDomElement:
|
65
|
+
branch_path_hash = HistoryTreeProcessor._parent_branch_path_hash(
|
66
|
+
dom_history_element.entire_parent_branch_path
|
67
|
+
)
|
68
|
+
attributes_hash = HistoryTreeProcessor._attributes_hash(
|
69
|
+
dom_history_element.attributes)
|
70
|
+
|
71
|
+
return HashedDomElement(branch_path_hash, attributes_hash)
|
72
|
+
|
73
|
+
@staticmethod
|
74
|
+
def _hash_dom_element(dom_element: DOMElementNode) -> HashedDomElement:
|
75
|
+
parent_branch_path = HistoryTreeProcessor._get_parent_branch_path(
|
76
|
+
dom_element)
|
77
|
+
branch_path_hash = HistoryTreeProcessor._parent_branch_path_hash(
|
78
|
+
parent_branch_path)
|
79
|
+
attributes_hash = HistoryTreeProcessor._attributes_hash(
|
80
|
+
dom_element.attributes)
|
81
|
+
# text_hash = DomTreeProcessor._text_hash(dom_element)
|
82
|
+
|
83
|
+
return HashedDomElement(branch_path_hash, attributes_hash)
|
84
|
+
|
85
|
+
@staticmethod
|
86
|
+
def _get_parent_branch_path(dom_element: DOMElementNode) -> list[str]:
|
87
|
+
parents: list[DOMElementNode] = []
|
88
|
+
current_element: DOMElementNode = dom_element
|
89
|
+
while current_element.parent is not None:
|
90
|
+
parents.append(current_element)
|
91
|
+
current_element = current_element.parent
|
92
|
+
|
93
|
+
parents.reverse()
|
94
|
+
|
95
|
+
return [parent.tag_name for parent in parents]
|
96
|
+
|
97
|
+
@staticmethod
|
98
|
+
def _parent_branch_path_hash(parent_branch_path: list[str]) -> str:
|
99
|
+
parent_branch_path_string = '/'.join(parent_branch_path)
|
100
|
+
return hashlib.sha256(parent_branch_path_string.encode()).hexdigest()
|
101
|
+
|
102
|
+
@staticmethod
|
103
|
+
def _attributes_hash(attributes: dict[str, str]) -> str:
|
104
|
+
attributes_string = ''.join(
|
105
|
+
f'{key}={value}' for key, value in attributes.items())
|
106
|
+
return hashlib.sha256(attributes_string.encode()).hexdigest()
|
107
|
+
|
108
|
+
@staticmethod
|
109
|
+
def _text_hash(dom_element: DOMElementNode) -> str:
|
110
|
+
""" """
|
111
|
+
text_string = dom_element.get_all_text_till_next_clickable_element()
|
112
|
+
return hashlib.sha256(text_string.encode()).hexdigest()
|
@@ -0,0 +1,33 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
|
5
|
+
@dataclass
|
6
|
+
class HashedDomElement:
|
7
|
+
"""
|
8
|
+
Hash of the dom element to be used as a unique identifier
|
9
|
+
"""
|
10
|
+
|
11
|
+
branch_path_hash: str
|
12
|
+
attributes_hash: str
|
13
|
+
# text_hash: str
|
14
|
+
|
15
|
+
|
16
|
+
@dataclass
|
17
|
+
class DOMHistoryElement:
|
18
|
+
tag_name: str
|
19
|
+
xpath: str
|
20
|
+
highlight_index: Optional[int]
|
21
|
+
entire_parent_branch_path: list[str]
|
22
|
+
attributes: dict[str, str]
|
23
|
+
shadow_root: bool = False
|
24
|
+
|
25
|
+
def to_dict(self) -> dict:
|
26
|
+
return {
|
27
|
+
'tag_name': self.tag_name,
|
28
|
+
'xpath': self.xpath,
|
29
|
+
'highlight_index': self.highlight_index,
|
30
|
+
'entire_parent_branch_path': self.entire_parent_branch_path,
|
31
|
+
'attributes': self.attributes,
|
32
|
+
'shadow_root': self.shadow_root,
|
33
|
+
}
|
lumivor/dom/service.py
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
import logging
|
2
|
+
from importlib import resources
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from playwright.async_api import Page
|
6
|
+
|
7
|
+
from lumivor.dom.views import (
|
8
|
+
DOMBaseNode,
|
9
|
+
DOMElementNode,
|
10
|
+
DOMState,
|
11
|
+
DOMTextNode,
|
12
|
+
SelectorMap,
|
13
|
+
)
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
class DomService:
|
19
|
+
def __init__(self, page: Page):
|
20
|
+
self.page = page
|
21
|
+
self.xpath_cache = {}
|
22
|
+
|
23
|
+
# region - Clickable elements
|
24
|
+
async def get_clickable_elements(self, highlight_elements: bool = True) -> DOMState:
|
25
|
+
element_tree = await self._build_dom_tree(highlight_elements)
|
26
|
+
selector_map = self._create_selector_map(element_tree)
|
27
|
+
|
28
|
+
return DOMState(element_tree=element_tree, selector_map=selector_map)
|
29
|
+
|
30
|
+
async def _build_dom_tree(self, highlight_elements: bool) -> DOMElementNode:
|
31
|
+
js_code = resources.read_text('lumivor.dom', 'buildDomTree.js')
|
32
|
+
|
33
|
+
eval_page = await self.page.evaluate(
|
34
|
+
js_code, [highlight_elements]
|
35
|
+
) # This is quite big, so be careful
|
36
|
+
html_to_dict = self._parse_node(eval_page)
|
37
|
+
|
38
|
+
if html_to_dict is None or not isinstance(html_to_dict, DOMElementNode):
|
39
|
+
raise ValueError('Failed to parse HTML to dictionary')
|
40
|
+
|
41
|
+
return html_to_dict
|
42
|
+
|
43
|
+
def _create_selector_map(self, element_tree: DOMElementNode) -> SelectorMap:
|
44
|
+
selector_map = {}
|
45
|
+
|
46
|
+
def process_node(node: DOMBaseNode):
|
47
|
+
if isinstance(node, DOMElementNode):
|
48
|
+
if node.highlight_index is not None:
|
49
|
+
selector_map[node.highlight_index] = node
|
50
|
+
|
51
|
+
for child in node.children:
|
52
|
+
process_node(child)
|
53
|
+
|
54
|
+
process_node(element_tree)
|
55
|
+
return selector_map
|
56
|
+
|
57
|
+
def _parse_node(
|
58
|
+
self,
|
59
|
+
node_data: dict,
|
60
|
+
parent: Optional[DOMElementNode] = None,
|
61
|
+
) -> Optional[DOMBaseNode]:
|
62
|
+
if not node_data:
|
63
|
+
return None
|
64
|
+
|
65
|
+
if node_data.get('type') == 'TEXT_NODE':
|
66
|
+
text_node = DOMTextNode(
|
67
|
+
text=node_data['text'],
|
68
|
+
is_visible=node_data['isVisible'],
|
69
|
+
parent=parent,
|
70
|
+
)
|
71
|
+
|
72
|
+
return text_node
|
73
|
+
|
74
|
+
tag_name = node_data['tagName']
|
75
|
+
|
76
|
+
element_node = DOMElementNode(
|
77
|
+
tag_name=tag_name,
|
78
|
+
xpath=node_data['xpath'],
|
79
|
+
attributes=node_data.get('attributes', {}),
|
80
|
+
children=[], # Initialize empty, will fill later
|
81
|
+
is_visible=node_data.get('isVisible', False),
|
82
|
+
is_interactive=node_data.get('isInteractive', False),
|
83
|
+
is_top_element=node_data.get('isTopElement', False),
|
84
|
+
highlight_index=node_data.get('highlightIndex'),
|
85
|
+
shadow_root=node_data.get('shadowRoot', False),
|
86
|
+
parent=parent,
|
87
|
+
)
|
88
|
+
|
89
|
+
children: list[DOMBaseNode] = []
|
90
|
+
for child in node_data.get('children', []):
|
91
|
+
if child is not None:
|
92
|
+
child_node = self._parse_node(child, parent=element_node)
|
93
|
+
if child_node is not None:
|
94
|
+
children.append(child_node)
|
95
|
+
|
96
|
+
element_node.children = children
|
97
|
+
|
98
|
+
return element_node
|
99
|
+
|
100
|
+
# endregion
|