lumivor 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lumivor/README.md +51 -0
- lumivor/__init__.py +25 -0
- lumivor/agent/message_manager/service.py +252 -0
- lumivor/agent/message_manager/tests.py +246 -0
- lumivor/agent/message_manager/views.py +37 -0
- lumivor/agent/prompts.py +208 -0
- lumivor/agent/service.py +1017 -0
- lumivor/agent/tests.py +204 -0
- lumivor/agent/views.py +272 -0
- lumivor/browser/browser.py +208 -0
- lumivor/browser/context.py +993 -0
- lumivor/browser/tests/screenshot_test.py +38 -0
- lumivor/browser/tests/test_clicks.py +77 -0
- lumivor/browser/views.py +48 -0
- lumivor/controller/registry/service.py +140 -0
- lumivor/controller/registry/views.py +71 -0
- lumivor/controller/service.py +557 -0
- lumivor/controller/views.py +47 -0
- lumivor/dom/__init__.py +0 -0
- lumivor/dom/buildDomTree.js +428 -0
- lumivor/dom/history_tree_processor/service.py +112 -0
- lumivor/dom/history_tree_processor/view.py +33 -0
- lumivor/dom/service.py +100 -0
- lumivor/dom/tests/extraction_test.py +44 -0
- lumivor/dom/tests/process_dom_test.py +40 -0
- lumivor/dom/views.py +187 -0
- lumivor/logging_config.py +128 -0
- lumivor/telemetry/service.py +114 -0
- lumivor/telemetry/views.py +51 -0
- lumivor/utils.py +54 -0
- lumivor-0.1.7.dist-info/METADATA +100 -0
- lumivor-0.1.7.dist-info/RECORD +34 -0
- lumivor-0.1.7.dist-info/WHEEL +4 -0
- lumivor-0.1.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,428 @@
|
|
1
|
+
(
|
2
|
+
doHighlightElements = true
|
3
|
+
) => {
|
4
|
+
let highlightIndex = 0; // Reset highlight index
|
5
|
+
|
6
|
+
function highlightElement(element, index, parentIframe = null) {
|
7
|
+
// Create or get highlight container
|
8
|
+
let container = document.getElementById('playwright-highlight-container');
|
9
|
+
if (!container) {
|
10
|
+
container = document.createElement('div');
|
11
|
+
container.id = 'playwright-highlight-container';
|
12
|
+
container.style.position = 'fixed';
|
13
|
+
container.style.pointerEvents = 'none';
|
14
|
+
container.style.top = '0';
|
15
|
+
container.style.left = '0';
|
16
|
+
container.style.width = '100%';
|
17
|
+
container.style.height = '100%';
|
18
|
+
container.style.zIndex = '2147483647'; // Maximum z-index value
|
19
|
+
document.documentElement.appendChild(container);
|
20
|
+
}
|
21
|
+
|
22
|
+
// Generate a color based on the index
|
23
|
+
const colors = [
|
24
|
+
'#FF0000', '#00FF00', '#0000FF', '#FFA500',
|
25
|
+
'#800080', '#008080', '#FF69B4', '#4B0082',
|
26
|
+
'#FF4500', '#2E8B57', '#DC143C', '#4682B4'
|
27
|
+
];
|
28
|
+
const colorIndex = index % colors.length;
|
29
|
+
const baseColor = colors[colorIndex];
|
30
|
+
const backgroundColor = `${baseColor}1A`; // 10% opacity version of the color
|
31
|
+
|
32
|
+
// Create highlight overlay
|
33
|
+
const overlay = document.createElement('div');
|
34
|
+
overlay.style.position = 'absolute';
|
35
|
+
overlay.style.border = `2px solid ${baseColor}`;
|
36
|
+
overlay.style.backgroundColor = backgroundColor;
|
37
|
+
overlay.style.pointerEvents = 'none';
|
38
|
+
overlay.style.boxSizing = 'border-box';
|
39
|
+
|
40
|
+
// Position overlay based on element
|
41
|
+
const rect = element.getBoundingClientRect();
|
42
|
+
let top = rect.top;
|
43
|
+
let left = rect.left;
|
44
|
+
|
45
|
+
// Adjust position if element is inside an iframe
|
46
|
+
if (parentIframe) {
|
47
|
+
const iframeRect = parentIframe.getBoundingClientRect();
|
48
|
+
top += iframeRect.top;
|
49
|
+
left += iframeRect.left;
|
50
|
+
}
|
51
|
+
|
52
|
+
overlay.style.top = `${top}px`;
|
53
|
+
overlay.style.left = `${left}px`;
|
54
|
+
overlay.style.width = `${rect.width}px`;
|
55
|
+
overlay.style.height = `${rect.height}px`;
|
56
|
+
|
57
|
+
// Create label
|
58
|
+
const label = document.createElement('div');
|
59
|
+
label.className = 'playwright-highlight-label';
|
60
|
+
label.style.position = 'absolute';
|
61
|
+
label.style.background = baseColor;
|
62
|
+
label.style.color = 'white';
|
63
|
+
label.style.padding = '1px 4px';
|
64
|
+
label.style.borderRadius = '4px';
|
65
|
+
label.style.fontSize = `${Math.min(12, Math.max(8, rect.height / 2))}px`; // Responsive font size
|
66
|
+
label.textContent = index;
|
67
|
+
|
68
|
+
// Calculate label position
|
69
|
+
const labelWidth = 20; // Approximate width
|
70
|
+
const labelHeight = 16; // Approximate height
|
71
|
+
|
72
|
+
// Default position (top-right corner inside the box)
|
73
|
+
let labelTop = top + 2;
|
74
|
+
let labelLeft = left + rect.width - labelWidth - 2;
|
75
|
+
|
76
|
+
// Adjust if box is too small
|
77
|
+
if (rect.width < labelWidth + 4 || rect.height < labelHeight + 4) {
|
78
|
+
// Position outside the box if it's too small
|
79
|
+
labelTop = top - labelHeight - 2;
|
80
|
+
labelLeft = left + rect.width - labelWidth;
|
81
|
+
}
|
82
|
+
|
83
|
+
// Ensure label stays within viewport
|
84
|
+
if (labelTop < 0) labelTop = top + 2;
|
85
|
+
if (labelLeft < 0) labelLeft = left + 2;
|
86
|
+
if (labelLeft + labelWidth > window.innerWidth) {
|
87
|
+
labelLeft = left + rect.width - labelWidth - 2;
|
88
|
+
}
|
89
|
+
|
90
|
+
label.style.top = `${labelTop}px`;
|
91
|
+
label.style.left = `${labelLeft}px`;
|
92
|
+
|
93
|
+
// Add to container
|
94
|
+
container.appendChild(overlay);
|
95
|
+
container.appendChild(label);
|
96
|
+
|
97
|
+
// Store reference for cleanup
|
98
|
+
element.setAttribute('browser-user-highlight-id', `playwright-highlight-${index}`);
|
99
|
+
|
100
|
+
return index + 1;
|
101
|
+
}
|
102
|
+
|
103
|
+
|
104
|
+
// Helper function to generate XPath as a tree
|
105
|
+
function getXPathTree(element, stopAtBoundary = true) {
|
106
|
+
const segments = [];
|
107
|
+
let currentElement = element;
|
108
|
+
|
109
|
+
while (currentElement && currentElement.nodeType === Node.ELEMENT_NODE) {
|
110
|
+
// Stop if we hit a shadow root or iframe
|
111
|
+
if (stopAtBoundary && (currentElement.parentNode instanceof ShadowRoot || currentElement.parentNode instanceof HTMLIFrameElement)) {
|
112
|
+
break;
|
113
|
+
}
|
114
|
+
|
115
|
+
let index = 0;
|
116
|
+
let sibling = currentElement.previousSibling;
|
117
|
+
while (sibling) {
|
118
|
+
if (sibling.nodeType === Node.ELEMENT_NODE &&
|
119
|
+
sibling.nodeName === currentElement.nodeName) {
|
120
|
+
index++;
|
121
|
+
}
|
122
|
+
sibling = sibling.previousSibling;
|
123
|
+
}
|
124
|
+
|
125
|
+
const tagName = currentElement.nodeName.toLowerCase();
|
126
|
+
const xpathIndex = index > 0 ? `[${index + 1}]` : '';
|
127
|
+
segments.unshift(`${tagName}${xpathIndex}`);
|
128
|
+
|
129
|
+
currentElement = currentElement.parentNode;
|
130
|
+
}
|
131
|
+
|
132
|
+
return segments.join('/');
|
133
|
+
}
|
134
|
+
|
135
|
+
// Helper function to check if element is accepted
|
136
|
+
function isElementAccepted(element) {
|
137
|
+
const leafElementDenyList = new Set(['svg', 'script', 'style', 'link', 'meta']);
|
138
|
+
return !leafElementDenyList.has(element.tagName.toLowerCase());
|
139
|
+
}
|
140
|
+
|
141
|
+
// Helper function to check if element is interactive
|
142
|
+
function isInteractiveElement(element) {
|
143
|
+
// Base interactive elements and roles
|
144
|
+
const interactiveElements = new Set([
|
145
|
+
'a', 'button', 'details', 'embed', 'input', 'label',
|
146
|
+
'menu', 'menuitem', 'object', 'select', 'textarea', 'summary'
|
147
|
+
]);
|
148
|
+
|
149
|
+
const interactiveRoles = new Set([
|
150
|
+
'button', 'menu', 'menuitem', 'link', 'checkbox', 'radio',
|
151
|
+
'slider', 'tab', 'tabpanel', 'textbox', 'combobox', 'grid',
|
152
|
+
'listbox', 'option', 'progressbar', 'scrollbar', 'searchbox',
|
153
|
+
'switch', 'tree', 'treeitem', 'spinbutton', 'tooltip', 'a-button-inner', 'a-dropdown-button', 'click',
|
154
|
+
'menuitemcheckbox', 'menuitemradio', 'a-button-text', 'button-text', 'button-icon', 'button-icon-only', 'button-text-icon-only', 'dropdown', 'combobox'
|
155
|
+
]);
|
156
|
+
|
157
|
+
const tagName = element.tagName.toLowerCase();
|
158
|
+
const role = element.getAttribute('role');
|
159
|
+
const ariaRole = element.getAttribute('aria-role');
|
160
|
+
const tabIndex = element.getAttribute('tabindex');
|
161
|
+
|
162
|
+
// Basic role/attribute checks
|
163
|
+
const hasInteractiveRole = interactiveElements.has(tagName) ||
|
164
|
+
interactiveRoles.has(role) ||
|
165
|
+
interactiveRoles.has(ariaRole) ||
|
166
|
+
(tabIndex !== null && tabIndex !== '-1') ||
|
167
|
+
element.getAttribute('data-action') === 'a-dropdown-select' ||
|
168
|
+
element.getAttribute('data-action') === 'a-dropdown-button';
|
169
|
+
|
170
|
+
if (hasInteractiveRole) return true;
|
171
|
+
|
172
|
+
// Get computed style
|
173
|
+
const style = window.getComputedStyle(element);
|
174
|
+
|
175
|
+
// Check if element has click-like styling
|
176
|
+
// const hasClickStyling = style.cursor === 'pointer' ||
|
177
|
+
// element.style.cursor === 'pointer' ||
|
178
|
+
// style.pointerEvents !== 'none';
|
179
|
+
|
180
|
+
// Check for event listeners
|
181
|
+
const hasClickHandler = element.onclick !== null ||
|
182
|
+
element.getAttribute('onclick') !== null ||
|
183
|
+
element.hasAttribute('ng-click') ||
|
184
|
+
element.hasAttribute('@click') ||
|
185
|
+
element.hasAttribute('v-on:click');
|
186
|
+
|
187
|
+
// Helper function to safely get event listeners
|
188
|
+
function getEventListeners(el) {
|
189
|
+
try {
|
190
|
+
// Try to get listeners using Chrome DevTools API
|
191
|
+
return window.getEventListeners?.(el) || {};
|
192
|
+
} catch (e) {
|
193
|
+
// Fallback: check for common event properties
|
194
|
+
const listeners = {};
|
195
|
+
|
196
|
+
// List of common event types to check
|
197
|
+
const eventTypes = [
|
198
|
+
'click', 'mousedown', 'mouseup',
|
199
|
+
'touchstart', 'touchend',
|
200
|
+
'keydown', 'keyup', 'focus', 'blur'
|
201
|
+
];
|
202
|
+
|
203
|
+
for (const type of eventTypes) {
|
204
|
+
const handler = el[`on${type}`];
|
205
|
+
if (handler) {
|
206
|
+
listeners[type] = [{
|
207
|
+
listener: handler,
|
208
|
+
useCapture: false
|
209
|
+
}];
|
210
|
+
}
|
211
|
+
}
|
212
|
+
|
213
|
+
return listeners;
|
214
|
+
}
|
215
|
+
}
|
216
|
+
|
217
|
+
// Check for click-related events on the element itself
|
218
|
+
const listeners = getEventListeners(element);
|
219
|
+
const hasClickListeners = listeners && (
|
220
|
+
listeners.click?.length > 0 ||
|
221
|
+
listeners.mousedown?.length > 0 ||
|
222
|
+
listeners.mouseup?.length > 0 ||
|
223
|
+
listeners.touchstart?.length > 0 ||
|
224
|
+
listeners.touchend?.length > 0
|
225
|
+
);
|
226
|
+
|
227
|
+
// Check for ARIA properties that suggest interactivity
|
228
|
+
const hasAriaProps = element.hasAttribute('aria-expanded') ||
|
229
|
+
element.hasAttribute('aria-pressed') ||
|
230
|
+
element.hasAttribute('aria-selected') ||
|
231
|
+
element.hasAttribute('aria-checked');
|
232
|
+
|
233
|
+
// Check for form-related functionality
|
234
|
+
const isFormRelated = element.form !== undefined ||
|
235
|
+
element.hasAttribute('contenteditable') ||
|
236
|
+
style.userSelect !== 'none';
|
237
|
+
|
238
|
+
// Check if element is draggable
|
239
|
+
const isDraggable = element.draggable ||
|
240
|
+
element.getAttribute('draggable') === 'true';
|
241
|
+
|
242
|
+
return hasAriaProps ||
|
243
|
+
// hasClickStyling ||
|
244
|
+
hasClickHandler ||
|
245
|
+
hasClickListeners ||
|
246
|
+
// isFormRelated ||
|
247
|
+
isDraggable;
|
248
|
+
|
249
|
+
}
|
250
|
+
|
251
|
+
// Helper function to check if element is visible
|
252
|
+
function isElementVisible(element) {
|
253
|
+
const style = window.getComputedStyle(element);
|
254
|
+
return element.offsetWidth > 0 &&
|
255
|
+
element.offsetHeight > 0 &&
|
256
|
+
style.visibility !== 'hidden' &&
|
257
|
+
style.display !== 'none';
|
258
|
+
}
|
259
|
+
|
260
|
+
// Helper function to check if element is the top element at its position
|
261
|
+
function isTopElement(element) {
|
262
|
+
// Find the correct document context and root element
|
263
|
+
let doc = element.ownerDocument;
|
264
|
+
|
265
|
+
// If we're in an iframe, elements are considered top by default
|
266
|
+
if (doc !== window.document) {
|
267
|
+
return true;
|
268
|
+
}
|
269
|
+
|
270
|
+
// For shadow DOM, we need to check within its own root context
|
271
|
+
const shadowRoot = element.getRootNode();
|
272
|
+
if (shadowRoot instanceof ShadowRoot) {
|
273
|
+
const rect = element.getBoundingClientRect();
|
274
|
+
const point = { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 };
|
275
|
+
|
276
|
+
try {
|
277
|
+
// Use shadow root's elementFromPoint to check within shadow DOM context
|
278
|
+
const topEl = shadowRoot.elementFromPoint(point.x, point.y);
|
279
|
+
if (!topEl) return false;
|
280
|
+
|
281
|
+
// Check if the element or any of its parents match our target element
|
282
|
+
let current = topEl;
|
283
|
+
while (current && current !== shadowRoot) {
|
284
|
+
if (current === element) return true;
|
285
|
+
current = current.parentElement;
|
286
|
+
}
|
287
|
+
return false;
|
288
|
+
} catch (e) {
|
289
|
+
return true; // If we can't determine, consider it visible
|
290
|
+
}
|
291
|
+
}
|
292
|
+
|
293
|
+
// Regular DOM elements
|
294
|
+
const rect = element.getBoundingClientRect();
|
295
|
+
const point = { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 };
|
296
|
+
|
297
|
+
try {
|
298
|
+
const topEl = document.elementFromPoint(point.x, point.y);
|
299
|
+
if (!topEl) return false;
|
300
|
+
|
301
|
+
let current = topEl;
|
302
|
+
while (current && current !== document.documentElement) {
|
303
|
+
if (current === element) return true;
|
304
|
+
current = current.parentElement;
|
305
|
+
}
|
306
|
+
return false;
|
307
|
+
} catch (e) {
|
308
|
+
return true;
|
309
|
+
}
|
310
|
+
}
|
311
|
+
|
312
|
+
// Helper function to check if text node is visible
|
313
|
+
function isTextNodeVisible(textNode) {
|
314
|
+
const range = document.createRange();
|
315
|
+
range.selectNodeContents(textNode);
|
316
|
+
const rect = range.getBoundingClientRect();
|
317
|
+
|
318
|
+
return rect.width !== 0 &&
|
319
|
+
rect.height !== 0 &&
|
320
|
+
rect.top >= 0 &&
|
321
|
+
rect.top <= window.innerHeight &&
|
322
|
+
textNode.parentElement?.checkVisibility({
|
323
|
+
checkOpacity: true,
|
324
|
+
checkVisibilityCSS: true
|
325
|
+
});
|
326
|
+
}
|
327
|
+
|
328
|
+
|
329
|
+
// Function to traverse the DOM and create nested JSON
|
330
|
+
function buildDomTree(node, parentIframe = null) {
|
331
|
+
if (!node) return null;
|
332
|
+
|
333
|
+
// Special case for text nodes
|
334
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
335
|
+
const textContent = node.textContent.trim();
|
336
|
+
if (textContent && isTextNodeVisible(node)) {
|
337
|
+
return {
|
338
|
+
type: "TEXT_NODE",
|
339
|
+
text: textContent,
|
340
|
+
isVisible: true,
|
341
|
+
};
|
342
|
+
}
|
343
|
+
return null;
|
344
|
+
}
|
345
|
+
|
346
|
+
// Check if element is accepted
|
347
|
+
if (node.nodeType === Node.ELEMENT_NODE && !isElementAccepted(node)) {
|
348
|
+
return null;
|
349
|
+
}
|
350
|
+
|
351
|
+
const nodeData = {
|
352
|
+
tagName: node.tagName ? node.tagName.toLowerCase() : null,
|
353
|
+
attributes: {},
|
354
|
+
xpath: node.nodeType === Node.ELEMENT_NODE ? getXPathTree(node, true) : null,
|
355
|
+
children: [],
|
356
|
+
};
|
357
|
+
|
358
|
+
// Copy all attributes if the node is an element
|
359
|
+
if (node.nodeType === Node.ELEMENT_NODE && node.attributes) {
|
360
|
+
// Use getAttributeNames() instead of directly iterating attributes
|
361
|
+
const attributeNames = node.getAttributeNames?.() || [];
|
362
|
+
for (const name of attributeNames) {
|
363
|
+
nodeData.attributes[name] = node.getAttribute(name);
|
364
|
+
}
|
365
|
+
}
|
366
|
+
|
367
|
+
if (node.nodeType === Node.ELEMENT_NODE) {
|
368
|
+
const isInteractive = isInteractiveElement(node);
|
369
|
+
const isVisible = isElementVisible(node);
|
370
|
+
const isTop = isTopElement(node);
|
371
|
+
|
372
|
+
nodeData.isInteractive = isInteractive;
|
373
|
+
nodeData.isVisible = isVisible;
|
374
|
+
nodeData.isTopElement = isTop;
|
375
|
+
|
376
|
+
// Highlight if element meets all criteria and highlighting is enabled
|
377
|
+
if (isInteractive && isVisible && isTop) {
|
378
|
+
nodeData.highlightIndex = highlightIndex++;
|
379
|
+
if (doHighlightElements) {
|
380
|
+
highlightElement(node, nodeData.highlightIndex, parentIframe);
|
381
|
+
}
|
382
|
+
}
|
383
|
+
}
|
384
|
+
|
385
|
+
// Only add iframeContext if we're inside an iframe
|
386
|
+
// if (parentIframe) {
|
387
|
+
// nodeData.iframeContext = `iframe[src="${parentIframe.src || ''}"]`;
|
388
|
+
// }
|
389
|
+
|
390
|
+
// Only add shadowRoot field if it exists
|
391
|
+
if (node.shadowRoot) {
|
392
|
+
nodeData.shadowRoot = true;
|
393
|
+
}
|
394
|
+
|
395
|
+
// Handle shadow DOM
|
396
|
+
if (node.shadowRoot) {
|
397
|
+
const shadowChildren = Array.from(node.shadowRoot.childNodes).map(child =>
|
398
|
+
buildDomTree(child, parentIframe)
|
399
|
+
);
|
400
|
+
nodeData.children.push(...shadowChildren);
|
401
|
+
}
|
402
|
+
|
403
|
+
// Handle iframes
|
404
|
+
if (node.tagName === 'IFRAME') {
|
405
|
+
try {
|
406
|
+
const iframeDoc = node.contentDocument || node.contentWindow.document;
|
407
|
+
if (iframeDoc) {
|
408
|
+
const iframeChildren = Array.from(iframeDoc.body.childNodes).map(child =>
|
409
|
+
buildDomTree(child, node)
|
410
|
+
);
|
411
|
+
nodeData.children.push(...iframeChildren);
|
412
|
+
}
|
413
|
+
} catch (e) {
|
414
|
+
console.warn('Unable to access iframe:', node);
|
415
|
+
}
|
416
|
+
} else {
|
417
|
+
const children = Array.from(node.childNodes).map(child =>
|
418
|
+
buildDomTree(child, parentIframe)
|
419
|
+
);
|
420
|
+
nodeData.children.push(...children);
|
421
|
+
}
|
422
|
+
|
423
|
+
return nodeData;
|
424
|
+
}
|
425
|
+
|
426
|
+
|
427
|
+
return buildDomTree(document.body);
|
428
|
+
}
|
@@ -0,0 +1,112 @@
|
|
1
|
+
import hashlib
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from lumivor.dom.history_tree_processor.view import DOMHistoryElement, HashedDomElement
|
6
|
+
from lumivor.dom.views import DOMElementNode
|
7
|
+
|
8
|
+
|
9
|
+
class HistoryTreeProcessor:
|
10
|
+
""" "
|
11
|
+
Operations on the DOM elements
|
12
|
+
|
13
|
+
@dev be careful - text nodes can change even if elements stay the same
|
14
|
+
"""
|
15
|
+
|
16
|
+
@staticmethod
|
17
|
+
def convert_dom_element_to_history_element(dom_element: DOMElementNode) -> DOMHistoryElement:
|
18
|
+
parent_branch_path = HistoryTreeProcessor._get_parent_branch_path(
|
19
|
+
dom_element)
|
20
|
+
return DOMHistoryElement(
|
21
|
+
dom_element.tag_name,
|
22
|
+
dom_element.xpath,
|
23
|
+
dom_element.highlight_index,
|
24
|
+
parent_branch_path,
|
25
|
+
dom_element.attributes,
|
26
|
+
dom_element.shadow_root,
|
27
|
+
)
|
28
|
+
|
29
|
+
@staticmethod
|
30
|
+
def find_history_element_in_tree(
|
31
|
+
dom_history_element: DOMHistoryElement, tree: DOMElementNode
|
32
|
+
) -> Optional[DOMElementNode]:
|
33
|
+
hashed_dom_history_element = HistoryTreeProcessor._hash_dom_history_element(
|
34
|
+
dom_history_element
|
35
|
+
)
|
36
|
+
|
37
|
+
def process_node(node: DOMElementNode):
|
38
|
+
if node.highlight_index is not None:
|
39
|
+
hashed_node = HistoryTreeProcessor._hash_dom_element(node)
|
40
|
+
if hashed_node == hashed_dom_history_element:
|
41
|
+
return node
|
42
|
+
for child in node.children:
|
43
|
+
if isinstance(child, DOMElementNode):
|
44
|
+
result = process_node(child)
|
45
|
+
if result is not None:
|
46
|
+
return result
|
47
|
+
return None
|
48
|
+
|
49
|
+
return process_node(tree)
|
50
|
+
|
51
|
+
@staticmethod
|
52
|
+
def compare_history_element_and_dom_element(
|
53
|
+
dom_history_element: DOMHistoryElement, dom_element: DOMElementNode
|
54
|
+
) -> bool:
|
55
|
+
hashed_dom_history_element = HistoryTreeProcessor._hash_dom_history_element(
|
56
|
+
dom_history_element
|
57
|
+
)
|
58
|
+
hashed_dom_element = HistoryTreeProcessor._hash_dom_element(
|
59
|
+
dom_element)
|
60
|
+
|
61
|
+
return hashed_dom_history_element == hashed_dom_element
|
62
|
+
|
63
|
+
@staticmethod
|
64
|
+
def _hash_dom_history_element(dom_history_element: DOMHistoryElement) -> HashedDomElement:
|
65
|
+
branch_path_hash = HistoryTreeProcessor._parent_branch_path_hash(
|
66
|
+
dom_history_element.entire_parent_branch_path
|
67
|
+
)
|
68
|
+
attributes_hash = HistoryTreeProcessor._attributes_hash(
|
69
|
+
dom_history_element.attributes)
|
70
|
+
|
71
|
+
return HashedDomElement(branch_path_hash, attributes_hash)
|
72
|
+
|
73
|
+
@staticmethod
|
74
|
+
def _hash_dom_element(dom_element: DOMElementNode) -> HashedDomElement:
|
75
|
+
parent_branch_path = HistoryTreeProcessor._get_parent_branch_path(
|
76
|
+
dom_element)
|
77
|
+
branch_path_hash = HistoryTreeProcessor._parent_branch_path_hash(
|
78
|
+
parent_branch_path)
|
79
|
+
attributes_hash = HistoryTreeProcessor._attributes_hash(
|
80
|
+
dom_element.attributes)
|
81
|
+
# text_hash = DomTreeProcessor._text_hash(dom_element)
|
82
|
+
|
83
|
+
return HashedDomElement(branch_path_hash, attributes_hash)
|
84
|
+
|
85
|
+
@staticmethod
|
86
|
+
def _get_parent_branch_path(dom_element: DOMElementNode) -> list[str]:
|
87
|
+
parents: list[DOMElementNode] = []
|
88
|
+
current_element: DOMElementNode = dom_element
|
89
|
+
while current_element.parent is not None:
|
90
|
+
parents.append(current_element)
|
91
|
+
current_element = current_element.parent
|
92
|
+
|
93
|
+
parents.reverse()
|
94
|
+
|
95
|
+
return [parent.tag_name for parent in parents]
|
96
|
+
|
97
|
+
@staticmethod
|
98
|
+
def _parent_branch_path_hash(parent_branch_path: list[str]) -> str:
|
99
|
+
parent_branch_path_string = '/'.join(parent_branch_path)
|
100
|
+
return hashlib.sha256(parent_branch_path_string.encode()).hexdigest()
|
101
|
+
|
102
|
+
@staticmethod
|
103
|
+
def _attributes_hash(attributes: dict[str, str]) -> str:
|
104
|
+
attributes_string = ''.join(
|
105
|
+
f'{key}={value}' for key, value in attributes.items())
|
106
|
+
return hashlib.sha256(attributes_string.encode()).hexdigest()
|
107
|
+
|
108
|
+
@staticmethod
|
109
|
+
def _text_hash(dom_element: DOMElementNode) -> str:
|
110
|
+
""" """
|
111
|
+
text_string = dom_element.get_all_text_till_next_clickable_element()
|
112
|
+
return hashlib.sha256(text_string.encode()).hexdigest()
|
@@ -0,0 +1,33 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
|
5
|
+
@dataclass
|
6
|
+
class HashedDomElement:
|
7
|
+
"""
|
8
|
+
Hash of the dom element to be used as a unique identifier
|
9
|
+
"""
|
10
|
+
|
11
|
+
branch_path_hash: str
|
12
|
+
attributes_hash: str
|
13
|
+
# text_hash: str
|
14
|
+
|
15
|
+
|
16
|
+
@dataclass
|
17
|
+
class DOMHistoryElement:
|
18
|
+
tag_name: str
|
19
|
+
xpath: str
|
20
|
+
highlight_index: Optional[int]
|
21
|
+
entire_parent_branch_path: list[str]
|
22
|
+
attributes: dict[str, str]
|
23
|
+
shadow_root: bool = False
|
24
|
+
|
25
|
+
def to_dict(self) -> dict:
|
26
|
+
return {
|
27
|
+
'tag_name': self.tag_name,
|
28
|
+
'xpath': self.xpath,
|
29
|
+
'highlight_index': self.highlight_index,
|
30
|
+
'entire_parent_branch_path': self.entire_parent_branch_path,
|
31
|
+
'attributes': self.attributes,
|
32
|
+
'shadow_root': self.shadow_root,
|
33
|
+
}
|
lumivor/dom/service.py
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
import logging
|
2
|
+
from importlib import resources
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from playwright.async_api import Page
|
6
|
+
|
7
|
+
from lumivor.dom.views import (
|
8
|
+
DOMBaseNode,
|
9
|
+
DOMElementNode,
|
10
|
+
DOMState,
|
11
|
+
DOMTextNode,
|
12
|
+
SelectorMap,
|
13
|
+
)
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
class DomService:
|
19
|
+
def __init__(self, page: Page):
|
20
|
+
self.page = page
|
21
|
+
self.xpath_cache = {}
|
22
|
+
|
23
|
+
# region - Clickable elements
|
24
|
+
async def get_clickable_elements(self, highlight_elements: bool = True) -> DOMState:
|
25
|
+
element_tree = await self._build_dom_tree(highlight_elements)
|
26
|
+
selector_map = self._create_selector_map(element_tree)
|
27
|
+
|
28
|
+
return DOMState(element_tree=element_tree, selector_map=selector_map)
|
29
|
+
|
30
|
+
async def _build_dom_tree(self, highlight_elements: bool) -> DOMElementNode:
|
31
|
+
js_code = resources.read_text('lumivor.dom', 'buildDomTree.js')
|
32
|
+
|
33
|
+
eval_page = await self.page.evaluate(
|
34
|
+
js_code, [highlight_elements]
|
35
|
+
) # This is quite big, so be careful
|
36
|
+
html_to_dict = self._parse_node(eval_page)
|
37
|
+
|
38
|
+
if html_to_dict is None or not isinstance(html_to_dict, DOMElementNode):
|
39
|
+
raise ValueError('Failed to parse HTML to dictionary')
|
40
|
+
|
41
|
+
return html_to_dict
|
42
|
+
|
43
|
+
def _create_selector_map(self, element_tree: DOMElementNode) -> SelectorMap:
|
44
|
+
selector_map = {}
|
45
|
+
|
46
|
+
def process_node(node: DOMBaseNode):
|
47
|
+
if isinstance(node, DOMElementNode):
|
48
|
+
if node.highlight_index is not None:
|
49
|
+
selector_map[node.highlight_index] = node
|
50
|
+
|
51
|
+
for child in node.children:
|
52
|
+
process_node(child)
|
53
|
+
|
54
|
+
process_node(element_tree)
|
55
|
+
return selector_map
|
56
|
+
|
57
|
+
def _parse_node(
|
58
|
+
self,
|
59
|
+
node_data: dict,
|
60
|
+
parent: Optional[DOMElementNode] = None,
|
61
|
+
) -> Optional[DOMBaseNode]:
|
62
|
+
if not node_data:
|
63
|
+
return None
|
64
|
+
|
65
|
+
if node_data.get('type') == 'TEXT_NODE':
|
66
|
+
text_node = DOMTextNode(
|
67
|
+
text=node_data['text'],
|
68
|
+
is_visible=node_data['isVisible'],
|
69
|
+
parent=parent,
|
70
|
+
)
|
71
|
+
|
72
|
+
return text_node
|
73
|
+
|
74
|
+
tag_name = node_data['tagName']
|
75
|
+
|
76
|
+
element_node = DOMElementNode(
|
77
|
+
tag_name=tag_name,
|
78
|
+
xpath=node_data['xpath'],
|
79
|
+
attributes=node_data.get('attributes', {}),
|
80
|
+
children=[], # Initialize empty, will fill later
|
81
|
+
is_visible=node_data.get('isVisible', False),
|
82
|
+
is_interactive=node_data.get('isInteractive', False),
|
83
|
+
is_top_element=node_data.get('isTopElement', False),
|
84
|
+
highlight_index=node_data.get('highlightIndex'),
|
85
|
+
shadow_root=node_data.get('shadowRoot', False),
|
86
|
+
parent=parent,
|
87
|
+
)
|
88
|
+
|
89
|
+
children: list[DOMBaseNode] = []
|
90
|
+
for child in node_data.get('children', []):
|
91
|
+
if child is not None:
|
92
|
+
child_node = self._parse_node(child, parent=element_node)
|
93
|
+
if child_node is not None:
|
94
|
+
children.append(child_node)
|
95
|
+
|
96
|
+
element_node.children = children
|
97
|
+
|
98
|
+
return element_node
|
99
|
+
|
100
|
+
# endregion
|