mx-cloud 0.0.13 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/interpret.d.ts +40 -0
- package/build/interpret.js +301 -7
- package/build/preprocessor.js +20 -7
- package/build/selector.d.ts +27 -0
- package/build/selector.js +485 -1
- package/build/types/workflow.d.ts +2 -0
- package/package.json +1 -1
package/build/interpret.d.ts
CHANGED
|
@@ -11,12 +11,14 @@ declare global {
|
|
|
11
11
|
selector: string;
|
|
12
12
|
tag: string;
|
|
13
13
|
attribute: string;
|
|
14
|
+
listFallbackSelector?: string;
|
|
14
15
|
}>) => Record<string, any>;
|
|
15
16
|
scrapeList: (config: {
|
|
16
17
|
listSelector: string;
|
|
17
18
|
fields: any;
|
|
18
19
|
limit?: number;
|
|
19
20
|
pagination: any;
|
|
21
|
+
listFallbackSelector?: string;
|
|
20
22
|
}) => Record<string, any>[];
|
|
21
23
|
scrapeListAuto: (listSelector: string) => {
|
|
22
24
|
selector: string;
|
|
@@ -95,6 +97,44 @@ export default class Interpreter extends EventEmitter {
|
|
|
95
97
|
private generatePageNodeInformation;
|
|
96
98
|
private detectElementChanges;
|
|
97
99
|
private validateWorkflowAction;
|
|
100
|
+
/**
|
|
101
|
+
* Test if a selector is working on the current page
|
|
102
|
+
* @param {Page} page - Playwright page object
|
|
103
|
+
* @param {string} selector - Selector to test
|
|
104
|
+
* @param {boolean} isListSelector - Whether this should find multiple elements
|
|
105
|
+
* @returns {Promise<boolean>} - Whether the selector works
|
|
106
|
+
*/
|
|
107
|
+
private testSelectorWorks;
|
|
108
|
+
/**
|
|
109
|
+
* Generate new selector from fallback selector
|
|
110
|
+
* @param {Page} page - Playwright page object
|
|
111
|
+
* @param {string} fallbackSelector - Fallback selector to use
|
|
112
|
+
* @param {boolean} isListSelector - Whether this is a list selector
|
|
113
|
+
* @param {string} listContext - List selector context for field selectors
|
|
114
|
+
* @returns {Promise<string|null>} - New selector or null if failed
|
|
115
|
+
*/
|
|
116
|
+
private generateSelectorFromFallback;
|
|
117
|
+
/**
|
|
118
|
+
* Validate and fix scrapeList action selectors
|
|
119
|
+
* @param {Object} scrapeListConfig - ScrapeList configuration object
|
|
120
|
+
* @param {Page} page - Playwright page object
|
|
121
|
+
* @returns {Promise<boolean>} - Whether any changes were made
|
|
122
|
+
*/
|
|
123
|
+
private validateScrapeListAction;
|
|
124
|
+
/**
|
|
125
|
+
* Validate and fix scrapeSchema action selectors
|
|
126
|
+
* @param {Object} scrapeSchemaConfig - ScrapeSchema configuration object
|
|
127
|
+
* @param {Page} page - Playwright page object
|
|
128
|
+
* @returns {Promise<boolean>} - Whether any changes were made
|
|
129
|
+
*/
|
|
130
|
+
private validateScrapeSchemaAction;
|
|
131
|
+
/**
|
|
132
|
+
* Validate and fix selectors for a workflow action just before execution
|
|
133
|
+
* @param {Page} page - Playwright page object
|
|
134
|
+
* @param {WhereWhatPair} action - The action to validate
|
|
135
|
+
* @returns {Promise<WhereWhatPair>} - The potentially modified action
|
|
136
|
+
*/
|
|
137
|
+
private validateAndFixSelectors;
|
|
98
138
|
private runLoop;
|
|
99
139
|
private ensureScriptsLoaded;
|
|
100
140
|
/**
|
package/build/interpret.js
CHANGED
|
@@ -559,7 +559,43 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
559
559
|
}
|
|
560
560
|
return false;
|
|
561
561
|
};
|
|
562
|
-
//
|
|
562
|
+
// Helper function to detect if a selector is XPath
|
|
563
|
+
const isXPathSelector = (selector) => {
|
|
564
|
+
return selector.startsWith('//') ||
|
|
565
|
+
selector.startsWith('/') ||
|
|
566
|
+
selector.startsWith('./') ||
|
|
567
|
+
selector.includes('contains(@') ||
|
|
568
|
+
selector.includes('[count(') ||
|
|
569
|
+
selector.includes('@class=') ||
|
|
570
|
+
selector.includes('@id=') ||
|
|
571
|
+
selector.includes(' and ') ||
|
|
572
|
+
selector.includes(' or ');
|
|
573
|
+
};
|
|
574
|
+
// Helper function to wait for selector (CSS or XPath)
|
|
575
|
+
const waitForSelectorUniversal = (selector_2, ...args_1) => __awaiter(this, [selector_2, ...args_1], void 0, function* (selector, options = {}) {
|
|
576
|
+
try {
|
|
577
|
+
if (isXPathSelector(selector)) {
|
|
578
|
+
// Use XPath locator
|
|
579
|
+
const locator = page.locator(`xpath=${selector}`);
|
|
580
|
+
yield locator.waitFor({
|
|
581
|
+
state: 'attached',
|
|
582
|
+
timeout: options.timeout || 10000
|
|
583
|
+
});
|
|
584
|
+
return yield locator.elementHandle();
|
|
585
|
+
}
|
|
586
|
+
else {
|
|
587
|
+
// Use CSS selector
|
|
588
|
+
return yield page.waitForSelector(selector, {
|
|
589
|
+
state: 'attached',
|
|
590
|
+
timeout: options.timeout || 10000
|
|
591
|
+
});
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
catch (error) {
|
|
595
|
+
return null;
|
|
596
|
+
}
|
|
597
|
+
});
|
|
598
|
+
// Enhanced button finder with retry mechanism for both CSS and XPath selectors
|
|
563
599
|
const findWorkingButton = (selectors) => __awaiter(this, void 0, void 0, function* () {
|
|
564
600
|
let updatedSelectors = [...selectors];
|
|
565
601
|
for (let i = 0; i < selectors.length; i++) {
|
|
@@ -568,10 +604,7 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
568
604
|
let selectorSuccess = false;
|
|
569
605
|
while (retryCount < MAX_RETRIES && !selectorSuccess) {
|
|
570
606
|
try {
|
|
571
|
-
const button = yield
|
|
572
|
-
state: 'attached',
|
|
573
|
-
timeout: 10000
|
|
574
|
-
});
|
|
607
|
+
const button = yield waitForSelectorUniversal(selector, { timeout: 10000 });
|
|
575
608
|
if (button) {
|
|
576
609
|
debugLog('Found working selector:', selector);
|
|
577
610
|
return {
|
|
@@ -1563,6 +1596,266 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
1563
1596
|
return modifiedAction;
|
|
1564
1597
|
});
|
|
1565
1598
|
}
|
|
1599
|
+
/**
|
|
1600
|
+
* Test if a selector is working on the current page
|
|
1601
|
+
* @param {Page} page - Playwright page object
|
|
1602
|
+
* @param {string} selector - Selector to test
|
|
1603
|
+
* @param {boolean} isListSelector - Whether this should find multiple elements
|
|
1604
|
+
* @returns {Promise<boolean>} - Whether the selector works
|
|
1605
|
+
*/
|
|
1606
|
+
testSelectorWorks(page_1, selector_2) {
|
|
1607
|
+
return __awaiter(this, arguments, void 0, function* (page, selector, isListSelector = false) {
|
|
1608
|
+
try {
|
|
1609
|
+
if (!selector || selector.trim() === '') {
|
|
1610
|
+
return false;
|
|
1611
|
+
}
|
|
1612
|
+
const isXPath = selector.startsWith('//') ||
|
|
1613
|
+
selector.startsWith('/') ||
|
|
1614
|
+
selector.includes('contains(@') ||
|
|
1615
|
+
selector.includes('@class=') ||
|
|
1616
|
+
selector.includes('@id=');
|
|
1617
|
+
let count = 0;
|
|
1618
|
+
if (isXPath) {
|
|
1619
|
+
const locator = page.locator(`xpath=${selector}`);
|
|
1620
|
+
count = yield locator.count();
|
|
1621
|
+
}
|
|
1622
|
+
else {
|
|
1623
|
+
const elements = yield page.$$(selector);
|
|
1624
|
+
count = elements ? elements.length : 0;
|
|
1625
|
+
}
|
|
1626
|
+
// For list selectors, we need multiple elements
|
|
1627
|
+
if (isListSelector) {
|
|
1628
|
+
return count >= 2;
|
|
1629
|
+
}
|
|
1630
|
+
// For field selectors, we need at least one element
|
|
1631
|
+
return count >= 1;
|
|
1632
|
+
}
|
|
1633
|
+
catch (error) {
|
|
1634
|
+
return false;
|
|
1635
|
+
}
|
|
1636
|
+
});
|
|
1637
|
+
}
|
|
1638
|
+
/**
|
|
1639
|
+
* Generate new selector from fallback selector
|
|
1640
|
+
* @param {Page} page - Playwright page object
|
|
1641
|
+
* @param {string} fallbackSelector - Fallback selector to use
|
|
1642
|
+
* @param {boolean} isListSelector - Whether this is a list selector
|
|
1643
|
+
* @param {string} listContext - List selector context for field selectors
|
|
1644
|
+
* @returns {Promise<string|null>} - New selector or null if failed
|
|
1645
|
+
*/
|
|
1646
|
+
generateSelectorFromFallback(page_1, fallbackSelector_1) {
|
|
1647
|
+
return __awaiter(this, arguments, void 0, function* (page, fallbackSelector, isListSelector = false, listContext = '', isPagination = false) {
|
|
1648
|
+
var _a, _b;
|
|
1649
|
+
try {
|
|
1650
|
+
// First check if fallback selector works
|
|
1651
|
+
const fallbackWorks = yield this.testSelectorWorks(page, fallbackSelector, isListSelector);
|
|
1652
|
+
if (!fallbackWorks) {
|
|
1653
|
+
return null;
|
|
1654
|
+
}
|
|
1655
|
+
// Get element using fallback selector
|
|
1656
|
+
const isXPath = fallbackSelector.startsWith('//') ||
|
|
1657
|
+
fallbackSelector.startsWith('/') ||
|
|
1658
|
+
fallbackSelector.includes('contains(@');
|
|
1659
|
+
let element;
|
|
1660
|
+
if (isXPath) {
|
|
1661
|
+
element = yield page.locator(`xpath=${fallbackSelector}`).first().elementHandle();
|
|
1662
|
+
}
|
|
1663
|
+
else {
|
|
1664
|
+
element = yield page.$(fallbackSelector);
|
|
1665
|
+
}
|
|
1666
|
+
if (!element) {
|
|
1667
|
+
return null;
|
|
1668
|
+
}
|
|
1669
|
+
// Generate new selectors
|
|
1670
|
+
let newSelectors;
|
|
1671
|
+
if (isListSelector) {
|
|
1672
|
+
return yield (0, selector_1.generateListSelectorFromFallback)(page, fallbackSelector);
|
|
1673
|
+
}
|
|
1674
|
+
else if (listContext) {
|
|
1675
|
+
return yield (0, selector_1.generateListFieldSelectorFromFallback)(page, fallbackSelector, listContext);
|
|
1676
|
+
}
|
|
1677
|
+
else {
|
|
1678
|
+
newSelectors = yield (0, selector_1.generateFieldSelectorFromFallback)(page, fallbackSelector);
|
|
1679
|
+
if (isPagination) {
|
|
1680
|
+
// For pagination, chain selectors in priority order
|
|
1681
|
+
let chainedSelectors = [
|
|
1682
|
+
(_a = newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.iframeSelector) === null || _a === void 0 ? void 0 : _a.full,
|
|
1683
|
+
(_b = newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.shadowSelector) === null || _b === void 0 ? void 0 : _b.full,
|
|
1684
|
+
newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.testIdSelector,
|
|
1685
|
+
newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.id,
|
|
1686
|
+
newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.hrefSelector,
|
|
1687
|
+
newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.accessibilitySelector,
|
|
1688
|
+
newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.attrSelector,
|
|
1689
|
+
]
|
|
1690
|
+
.filter(selector => selector !== null && selector !== undefined)
|
|
1691
|
+
.join(',');
|
|
1692
|
+
return chainedSelectors;
|
|
1693
|
+
}
|
|
1694
|
+
else {
|
|
1695
|
+
// For non-pagination, use getBestSelector
|
|
1696
|
+
const tagName = yield element.evaluate(el => el.tagName.toLowerCase());
|
|
1697
|
+
return yield (0, utils_1.getBestSelector)({
|
|
1698
|
+
selectors: newSelectors,
|
|
1699
|
+
tagName: tagName
|
|
1700
|
+
});
|
|
1701
|
+
}
|
|
1702
|
+
}
|
|
1703
|
+
}
|
|
1704
|
+
catch (error) {
|
|
1705
|
+
console.error(`Failed to generate selector from fallback: ${error.message}`);
|
|
1706
|
+
return null;
|
|
1707
|
+
}
|
|
1708
|
+
});
|
|
1709
|
+
}
|
|
1710
|
+
/**
|
|
1711
|
+
* Validate and fix scrapeList action selectors
|
|
1712
|
+
* @param {Object} scrapeListConfig - ScrapeList configuration object
|
|
1713
|
+
* @param {Page} page - Playwright page object
|
|
1714
|
+
* @returns {Promise<boolean>} - Whether any changes were made
|
|
1715
|
+
*/
|
|
1716
|
+
validateScrapeListAction(scrapeListConfig, page) {
|
|
1717
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
1718
|
+
let hasChanges = false;
|
|
1719
|
+
try {
|
|
1720
|
+
// Validate listSelector
|
|
1721
|
+
const listSelectorWorks = yield this.testSelectorWorks(page, scrapeListConfig.listSelector, true);
|
|
1722
|
+
if (!listSelectorWorks && scrapeListConfig.listFallbackSelector) {
|
|
1723
|
+
console.log(`ListSelector "${scrapeListConfig.listSelector}" not working, trying fallback...`);
|
|
1724
|
+
const newListSelector = yield this.generateSelectorFromFallback(page, scrapeListConfig.listFallbackSelector, true);
|
|
1725
|
+
if (newListSelector) {
|
|
1726
|
+
console.log(`Updated listSelector: ${scrapeListConfig.listSelector} -> ${newListSelector}`);
|
|
1727
|
+
scrapeListConfig.listSelector = newListSelector;
|
|
1728
|
+
hasChanges = true;
|
|
1729
|
+
}
|
|
1730
|
+
}
|
|
1731
|
+
// Validate field selectors
|
|
1732
|
+
if (scrapeListConfig.fields) {
|
|
1733
|
+
for (const [fieldName, fieldConfig] of Object.entries(scrapeListConfig.fields)) {
|
|
1734
|
+
const fieldSelectorWorks = yield this.testSelectorWorks(page, fieldConfig.selector, false);
|
|
1735
|
+
if (!fieldSelectorWorks && fieldConfig.fallbackSelector) {
|
|
1736
|
+
console.log(`Field selector "${fieldConfig.selector}" for ${fieldName} not working, trying fallback...`);
|
|
1737
|
+
const newFieldSelector = yield this.generateSelectorFromFallback(page, fieldConfig.fallbackSelector, false, scrapeListConfig.listSelector);
|
|
1738
|
+
if (newFieldSelector) {
|
|
1739
|
+
console.log(`Updated field selector for ${fieldName}: ${fieldConfig.selector} -> ${newFieldSelector}`);
|
|
1740
|
+
fieldConfig.selector = newFieldSelector;
|
|
1741
|
+
hasChanges = true;
|
|
1742
|
+
}
|
|
1743
|
+
}
|
|
1744
|
+
}
|
|
1745
|
+
}
|
|
1746
|
+
// Validate pagination selector if it exists and is not empty
|
|
1747
|
+
if (scrapeListConfig.pagination &&
|
|
1748
|
+
scrapeListConfig.pagination.selector &&
|
|
1749
|
+
scrapeListConfig.pagination.selector.trim() !== '') {
|
|
1750
|
+
// Handle comma-separated pagination selectors
|
|
1751
|
+
const paginationSelectors = scrapeListConfig.pagination.selector.split(',').map(s => s.trim());
|
|
1752
|
+
let workingSelector = null;
|
|
1753
|
+
for (const selector of paginationSelectors) {
|
|
1754
|
+
const works = yield this.testSelectorWorks(page, selector, false);
|
|
1755
|
+
if (works) {
|
|
1756
|
+
workingSelector = selector;
|
|
1757
|
+
break;
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
if (!workingSelector && scrapeListConfig.pagination.fallbackSelector) {
|
|
1761
|
+
console.log(`Pagination selector not working, trying fallback...`);
|
|
1762
|
+
const newPaginationSelector = yield this.generateSelectorFromFallback(page, scrapeListConfig.pagination.fallbackSelector, false, '', true);
|
|
1763
|
+
if (newPaginationSelector) {
|
|
1764
|
+
console.log(`Updated pagination selector: ${scrapeListConfig.pagination.selector} -> ${newPaginationSelector}`);
|
|
1765
|
+
scrapeListConfig.pagination.selector = newPaginationSelector;
|
|
1766
|
+
hasChanges = true;
|
|
1767
|
+
}
|
|
1768
|
+
}
|
|
1769
|
+
else if (workingSelector && workingSelector !== scrapeListConfig.pagination.selector) {
|
|
1770
|
+
scrapeListConfig.pagination.selector = workingSelector;
|
|
1771
|
+
hasChanges = true;
|
|
1772
|
+
}
|
|
1773
|
+
}
|
|
1774
|
+
}
|
|
1775
|
+
catch (error) {
|
|
1776
|
+
console.error(`Error validating scrapeList action: ${error.message}`);
|
|
1777
|
+
}
|
|
1778
|
+
return hasChanges;
|
|
1779
|
+
});
|
|
1780
|
+
}
|
|
1781
|
+
/**
|
|
1782
|
+
* Validate and fix scrapeSchema action selectors
|
|
1783
|
+
* @param {Object} scrapeSchemaConfig - ScrapeSchema configuration object
|
|
1784
|
+
* @param {Page} page - Playwright page object
|
|
1785
|
+
* @returns {Promise<boolean>} - Whether any changes were made
|
|
1786
|
+
*/
|
|
1787
|
+
validateScrapeSchemaAction(scrapeSchemaConfig, page) {
|
|
1788
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
1789
|
+
let hasChanges = false;
|
|
1790
|
+
try {
|
|
1791
|
+
for (const [fieldName, fieldConfig] of Object.entries(scrapeSchemaConfig)) {
|
|
1792
|
+
if (fieldConfig.selector) {
|
|
1793
|
+
const selectorWorks = yield this.testSelectorWorks(page, fieldConfig.selector, false);
|
|
1794
|
+
if (!selectorWorks && fieldConfig.fallbackSelector) {
|
|
1795
|
+
console.log(`Schema field selector "${fieldConfig.selector}" for ${fieldName} not working, trying fallback...`);
|
|
1796
|
+
const newSelector = yield this.generateSelectorFromFallback(page, fieldConfig.fallbackSelector, false);
|
|
1797
|
+
if (newSelector) {
|
|
1798
|
+
console.log(`Updated schema field selector for ${fieldName}: ${fieldConfig.selector} -> ${newSelector}`);
|
|
1799
|
+
fieldConfig.selector = newSelector;
|
|
1800
|
+
hasChanges = true;
|
|
1801
|
+
}
|
|
1802
|
+
}
|
|
1803
|
+
}
|
|
1804
|
+
}
|
|
1805
|
+
}
|
|
1806
|
+
catch (error) {
|
|
1807
|
+
console.error(`Error validating scrapeSchema action: ${error.message}`);
|
|
1808
|
+
}
|
|
1809
|
+
return hasChanges;
|
|
1810
|
+
});
|
|
1811
|
+
}
|
|
1812
|
+
/**
|
|
1813
|
+
* Validate and fix selectors for a workflow action just before execution
|
|
1814
|
+
* @param {Page} page - Playwright page object
|
|
1815
|
+
* @param {WhereWhatPair} action - The action to validate
|
|
1816
|
+
* @returns {Promise<WhereWhatPair>} - The potentially modified action
|
|
1817
|
+
*/
|
|
1818
|
+
validateAndFixSelectors(page, action) {
|
|
1819
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
1820
|
+
const modifiedAction = JSON.parse(JSON.stringify(action));
|
|
1821
|
+
let totalChanges = 0;
|
|
1822
|
+
try {
|
|
1823
|
+
// Process each action in the 'what' array
|
|
1824
|
+
for (let i = 0; i < modifiedAction.what.length; i++) {
|
|
1825
|
+
const whatAction = modifiedAction.what[i];
|
|
1826
|
+
// Handle scrapeList actions
|
|
1827
|
+
if (whatAction.action === 'scrapeList' && whatAction.args && whatAction.args[0]) {
|
|
1828
|
+
console.log(`Validating scrapeList action...`);
|
|
1829
|
+
const hasChanges = yield this.validateScrapeListAction(whatAction.args[0], page);
|
|
1830
|
+
if (hasChanges) {
|
|
1831
|
+
totalChanges++;
|
|
1832
|
+
console.log(`Fixed scrapeList selectors`);
|
|
1833
|
+
}
|
|
1834
|
+
}
|
|
1835
|
+
// Handle scrapeSchema actions
|
|
1836
|
+
if (whatAction.action === 'scrapeSchema' && whatAction.args && whatAction.args[0]) {
|
|
1837
|
+
console.log(`Validating scrapeSchema action...`);
|
|
1838
|
+
const hasChanges = yield this.validateScrapeSchemaAction(whatAction.args[0], page);
|
|
1839
|
+
if (hasChanges) {
|
|
1840
|
+
totalChanges++;
|
|
1841
|
+
console.log(`Fixed scrapeSchema selectors`);
|
|
1842
|
+
}
|
|
1843
|
+
}
|
|
1844
|
+
}
|
|
1845
|
+
if (totalChanges > 0) {
|
|
1846
|
+
console.log(`Selector validation completed: ${totalChanges} actions modified`);
|
|
1847
|
+
}
|
|
1848
|
+
else {
|
|
1849
|
+
console.log(`Selector validation completed: No changes needed`);
|
|
1850
|
+
}
|
|
1851
|
+
}
|
|
1852
|
+
catch (error) {
|
|
1853
|
+
console.error(`Error in selector validation: ${error.message}`);
|
|
1854
|
+
this.trackAutohealFailure(`Selector validation failed: ${error.message}`);
|
|
1855
|
+
}
|
|
1856
|
+
return modifiedAction;
|
|
1857
|
+
});
|
|
1858
|
+
}
|
|
1566
1859
|
runLoop(p, workflow) {
|
|
1567
1860
|
return __awaiter(this, void 0, void 0, function* () {
|
|
1568
1861
|
var _a, _b;
|
|
@@ -1660,8 +1953,9 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
1660
1953
|
}
|
|
1661
1954
|
lastAction = action;
|
|
1662
1955
|
try {
|
|
1663
|
-
|
|
1664
|
-
|
|
1956
|
+
const validatedAction = yield this.validateAndFixSelectors(p, action);
|
|
1957
|
+
console.log("Carrying out:", validatedAction.what);
|
|
1958
|
+
yield this.carryOutSteps(p, validatedAction.what);
|
|
1665
1959
|
usedActions.push((_b = action.id) !== null && _b !== void 0 ? _b : 'undefined');
|
|
1666
1960
|
workflowCopy.splice(actionId, 1);
|
|
1667
1961
|
console.log(`Action with ID ${action.id} removed from the workflow copy.`);
|
package/build/preprocessor.js
CHANGED
|
@@ -47,7 +47,7 @@ class Preprocessor {
|
|
|
47
47
|
*/
|
|
48
48
|
static getParams(workflow) {
|
|
49
49
|
const getParamsRecurse = (object) => {
|
|
50
|
-
if (typeof object === 'object') {
|
|
50
|
+
if (typeof object === 'object' && object !== null) {
|
|
51
51
|
// Recursion base case
|
|
52
52
|
if (object.$param) {
|
|
53
53
|
return [object.$param];
|
|
@@ -123,13 +123,26 @@ class Preprocessor {
|
|
|
123
123
|
const out = object;
|
|
124
124
|
// for every key (child) of the object
|
|
125
125
|
Object.keys(object).forEach((key) => {
|
|
126
|
-
|
|
127
|
-
if
|
|
128
|
-
|
|
129
|
-
|
|
126
|
+
const childValue = object[key];
|
|
127
|
+
// Skip if childValue is null, undefined, or not an object
|
|
128
|
+
if (!childValue || typeof childValue !== 'object') {
|
|
129
|
+
return; // Continue to next iteration
|
|
130
130
|
}
|
|
131
|
-
|
|
132
|
-
|
|
131
|
+
try {
|
|
132
|
+
const childKeys = Object.keys(childValue);
|
|
133
|
+
// if the field has only one key, which is `k`
|
|
134
|
+
if (childKeys.length === 1 && childValue[k]) {
|
|
135
|
+
// process the current special tag (init param, hydrate regex...)
|
|
136
|
+
out[key] = f(childValue[k]);
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
// Recursively process the child object
|
|
140
|
+
initSpecialRecurse(childValue, k, f);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
catch (error) {
|
|
144
|
+
// If Object.keys fails or any other error, just continue
|
|
145
|
+
console.warn(`Error processing key "${key}" in initSpecialRecurse:`, error);
|
|
133
146
|
}
|
|
134
147
|
});
|
|
135
148
|
return out;
|
package/build/selector.d.ts
CHANGED
|
@@ -29,4 +29,31 @@ interface SelectorResult {
|
|
|
29
29
|
* @returns {Promise<Selectors|null|undefined>}
|
|
30
30
|
*/
|
|
31
31
|
export declare const generateNonUniqueSelectors: (page: Page, elementHandle: ElementHandle, listSelector?: string) => Promise<SelectorResult>;
|
|
32
|
+
/**
|
|
33
|
+
* Generate new list selector from fallback element (based on your reference implementation)
|
|
34
|
+
* @param page - Playwright page object
|
|
35
|
+
* @param fallbackSelector - Fallback selector to use
|
|
36
|
+
* @returns New list selector or null if failed
|
|
37
|
+
*/
|
|
38
|
+
export declare const generateListSelectorFromFallback: (page: Page, fallbackSelector: string) => Promise<string | null>;
|
|
39
|
+
/**
|
|
40
|
+
* Generate new field selector from fallback selector (one field at a time)
|
|
41
|
+
* @param page - Playwright page object
|
|
42
|
+
* @param fallbackSelector - Fallback selector to use
|
|
43
|
+
* @param listSelector - The list selector context
|
|
44
|
+
* @returns New field selector or null if failed
|
|
45
|
+
*/
|
|
46
|
+
export declare const generateListFieldSelectorFromFallback: (page: Page, fallbackSelector: string, listSelector: string) => Promise<string | null>;
|
|
47
|
+
export declare const generateFieldSelectorFromFallback: (page: Page, fallbackSelector: string) => Promise<{
|
|
48
|
+
id: string | null;
|
|
49
|
+
generalSelector: string | null;
|
|
50
|
+
attrSelector: string | null;
|
|
51
|
+
testIdSelector: string | null;
|
|
52
|
+
text: string;
|
|
53
|
+
href?: string;
|
|
54
|
+
hrefSelector: string | null;
|
|
55
|
+
accessibilitySelector: string | null;
|
|
56
|
+
formSelector: string | null;
|
|
57
|
+
relSelector: string | null;
|
|
58
|
+
} | null>;
|
|
32
59
|
export {};
|
package/build/selector.js
CHANGED
|
@@ -9,7 +9,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
9
9
|
});
|
|
10
10
|
};
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
-
exports.generateNonUniqueSelectors = exports.generateSelectors = void 0;
|
|
12
|
+
exports.generateFieldSelectorFromFallback = exports.generateListFieldSelectorFromFallback = exports.generateListSelectorFromFallback = exports.generateNonUniqueSelectors = exports.generateSelectors = void 0;
|
|
13
13
|
const generateSelectors = (page, elementHandle) => __awaiter(void 0, void 0, void 0, function* () {
|
|
14
14
|
try {
|
|
15
15
|
const selectors = yield elementHandle.evaluate((element) => {
|
|
@@ -848,3 +848,487 @@ const generateNonUniqueSelectors = (page_1, elementHandle_1, ...args_1) => __awa
|
|
|
848
848
|
}
|
|
849
849
|
});
|
|
850
850
|
exports.generateNonUniqueSelectors = generateNonUniqueSelectors;
|
|
851
|
+
/**
|
|
852
|
+
* Generate new list selector from fallback element (based on your reference implementation)
|
|
853
|
+
* @param page - Playwright page object
|
|
854
|
+
* @param fallbackSelector - Fallback selector to use
|
|
855
|
+
* @returns New list selector or null if failed
|
|
856
|
+
*/
|
|
857
|
+
const generateListSelectorFromFallback = (page, fallbackSelector) => __awaiter(void 0, void 0, void 0, function* () {
|
|
858
|
+
try {
|
|
859
|
+
// Execute selector generation within the page context
|
|
860
|
+
const newSelector = yield page.evaluate((selector) => {
|
|
861
|
+
try {
|
|
862
|
+
// Check if selector is XPath
|
|
863
|
+
const isXPath = selector.startsWith('//') ||
|
|
864
|
+
selector.startsWith('/') ||
|
|
865
|
+
selector.includes('contains(@');
|
|
866
|
+
let elements;
|
|
867
|
+
if (isXPath) {
|
|
868
|
+
// Use XPath evaluation
|
|
869
|
+
const xpathResult = document.evaluate(selector, document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
|
|
870
|
+
elements = [];
|
|
871
|
+
let node = xpathResult.iterateNext();
|
|
872
|
+
while (node && elements.length < 5) { // Limit to 5 elements for performance
|
|
873
|
+
if (node.nodeType === Node.ELEMENT_NODE) {
|
|
874
|
+
elements.push(node);
|
|
875
|
+
}
|
|
876
|
+
node = xpathResult.iterateNext();
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
else {
|
|
880
|
+
// Use CSS selector
|
|
881
|
+
const nodeList = document.querySelectorAll(selector);
|
|
882
|
+
elements = Array.from(nodeList).slice(0, 5); // Limit to 5 elements
|
|
883
|
+
}
|
|
884
|
+
if (!elements || elements.length === 0) {
|
|
885
|
+
return null;
|
|
886
|
+
}
|
|
887
|
+
// Extract element data for analysis
|
|
888
|
+
const elementData = elements.map((el) => ({
|
|
889
|
+
tagName: el.tagName.toLowerCase(),
|
|
890
|
+
className: el.getAttribute('class') || '',
|
|
891
|
+
attributes: Array.from(el.attributes).reduce((attrs, attr) => {
|
|
892
|
+
if (!['id', 'style', 'data-mx-id'].includes(attr.name)) {
|
|
893
|
+
attrs[attr.name] = attr.value;
|
|
894
|
+
}
|
|
895
|
+
return attrs;
|
|
896
|
+
}, {}),
|
|
897
|
+
childrenCount: el.children.length
|
|
898
|
+
}));
|
|
899
|
+
if (elementData.length === 0) {
|
|
900
|
+
return null;
|
|
901
|
+
}
|
|
902
|
+
const firstElement = elementData[0];
|
|
903
|
+
const tagName = firstElement.tagName;
|
|
904
|
+
// Check if all elements have the same tag name
|
|
905
|
+
const allSameTag = elementData.every((el) => el.tagName === tagName);
|
|
906
|
+
if (!allSameTag) {
|
|
907
|
+
console.warn("Inconsistent tag names in group, using first element's tag");
|
|
908
|
+
}
|
|
909
|
+
// Start building XPath - ALWAYS generate primary XPath
|
|
910
|
+
let xpath = `//${tagName}`;
|
|
911
|
+
const predicates = [];
|
|
912
|
+
// Get common classes
|
|
913
|
+
const allClasses = elementData.map((el) => el.className.split(/\s+/).filter(Boolean));
|
|
914
|
+
if (allClasses.length > 0 && allClasses[0].length > 0) {
|
|
915
|
+
// Find classes that appear in most elements (at least 60%)
|
|
916
|
+
const classFrequency = new Map();
|
|
917
|
+
allClasses.forEach((classes) => {
|
|
918
|
+
classes.forEach((cls) => {
|
|
919
|
+
classFrequency.set(cls, (classFrequency.get(cls) || 0) + 1);
|
|
920
|
+
});
|
|
921
|
+
});
|
|
922
|
+
const minFrequency = Math.ceil(allClasses.length * 0.6);
|
|
923
|
+
const commonClasses = Array.from(classFrequency.entries())
|
|
924
|
+
.filter(([_, count]) => count >= minFrequency)
|
|
925
|
+
.map(([cls, _]) => cls);
|
|
926
|
+
if (commonClasses.length > 0) {
|
|
927
|
+
predicates.push(...commonClasses.map((cls) => `contains(@class, '${cls}')`));
|
|
928
|
+
}
|
|
929
|
+
}
|
|
930
|
+
// Get common attributes (excluding id, style, data-mx-id)
|
|
931
|
+
if (elementData.length > 1) {
|
|
932
|
+
const commonAttributes = {};
|
|
933
|
+
const firstAttrs = firstElement.attributes;
|
|
934
|
+
for (const [attr, value] of Object.entries(firstAttrs)) {
|
|
935
|
+
const isCommon = elementData.every((el) => el.attributes[attr] === value);
|
|
936
|
+
if (isCommon) {
|
|
937
|
+
commonAttributes[attr] = value;
|
|
938
|
+
}
|
|
939
|
+
}
|
|
940
|
+
for (const [attr, value] of Object.entries(commonAttributes)) {
|
|
941
|
+
predicates.push(`@${attr}='${value}'`);
|
|
942
|
+
}
|
|
943
|
+
}
|
|
944
|
+
// Optional: Common child count (only if consistent across most elements)
|
|
945
|
+
const childCountFrequency = new Map();
|
|
946
|
+
elementData.forEach((el) => {
|
|
947
|
+
childCountFrequency.set(el.childrenCount, (childCountFrequency.get(el.childrenCount) || 0) + 1);
|
|
948
|
+
});
|
|
949
|
+
const mostCommonChildCount = Array.from(childCountFrequency.entries())
|
|
950
|
+
.sort((a, b) => b[1] - a[1])[0];
|
|
951
|
+
if (mostCommonChildCount && mostCommonChildCount[1] >= Math.ceil(elementData.length * 0.8)) {
|
|
952
|
+
predicates.push(`count(*)=${mostCommonChildCount[0]}`);
|
|
953
|
+
}
|
|
954
|
+
// Build final XPath
|
|
955
|
+
if (predicates.length > 0) {
|
|
956
|
+
xpath += `[${predicates.join(' and ')}]`;
|
|
957
|
+
}
|
|
958
|
+
console.log(`Generated list selector: ${xpath} from fallback: ${selector}`);
|
|
959
|
+
return xpath;
|
|
960
|
+
}
|
|
961
|
+
catch (error) {
|
|
962
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
963
|
+
console.error(`Failed to generate list selector from fallback: ${errorMessage}`);
|
|
964
|
+
return null;
|
|
965
|
+
}
|
|
966
|
+
}, fallbackSelector);
|
|
967
|
+
return newSelector;
|
|
968
|
+
}
|
|
969
|
+
catch (error) {
|
|
970
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
971
|
+
console.error(`Failed to execute selector generation: ${errorMessage}`);
|
|
972
|
+
return null;
|
|
973
|
+
}
|
|
974
|
+
});
|
|
975
|
+
exports.generateListSelectorFromFallback = generateListSelectorFromFallback;
|
|
976
|
+
/**
|
|
977
|
+
* Generate new field selector from fallback selector (one field at a time)
|
|
978
|
+
* @param page - Playwright page object
|
|
979
|
+
* @param fallbackSelector - Fallback selector to use
|
|
980
|
+
* @param listSelector - The list selector context
|
|
981
|
+
* @returns New field selector or null if failed
|
|
982
|
+
*/
|
|
983
|
+
const generateListFieldSelectorFromFallback = (page, fallbackSelector, listSelector) => __awaiter(void 0, void 0, void 0, function* () {
|
|
984
|
+
try {
|
|
985
|
+
// Execute field selector generation within the page context
|
|
986
|
+
const newSelector = yield page.evaluate(({ fallbackSel, listSel }) => {
|
|
987
|
+
// Helper function to check if selector is XPath
|
|
988
|
+
const isXPathSelector = (selector) => {
|
|
989
|
+
return selector.startsWith('//') ||
|
|
990
|
+
selector.startsWith('/') ||
|
|
991
|
+
selector.includes('contains(@') ||
|
|
992
|
+
selector.includes('@class=') ||
|
|
993
|
+
selector.includes('@id=');
|
|
994
|
+
};
|
|
995
|
+
// Helper function to evaluate XPath
|
|
996
|
+
const evaluateXPath = (xpath) => {
|
|
997
|
+
try {
|
|
998
|
+
const result = document.evaluate(xpath, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
|
999
|
+
const elements = [];
|
|
1000
|
+
for (let i = 0; i < result.snapshotLength; i++) {
|
|
1001
|
+
const node = result.snapshotItem(i);
|
|
1002
|
+
if (node && node.nodeType === Node.ELEMENT_NODE) {
|
|
1003
|
+
elements.push(node);
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
return elements;
|
|
1007
|
+
}
|
|
1008
|
+
catch (error) {
|
|
1009
|
+
return [];
|
|
1010
|
+
}
|
|
1011
|
+
};
|
|
1012
|
+
// Helper function to get sibling position
|
|
1013
|
+
const getSiblingPosition = (element, parent) => {
|
|
1014
|
+
const siblings = Array.from(parent.children || [])
|
|
1015
|
+
.filter((child) => child.tagName === element.tagName);
|
|
1016
|
+
return siblings.indexOf(element) + 1;
|
|
1017
|
+
};
|
|
1018
|
+
// Generate optimized structural step
|
|
1019
|
+
const generateOptimizedStructuralStep = (element, rootElement) => {
|
|
1020
|
+
const tagName = element.tagName.toLowerCase();
|
|
1021
|
+
const parent = element.parentElement;
|
|
1022
|
+
if (!parent) {
|
|
1023
|
+
return tagName;
|
|
1024
|
+
}
|
|
1025
|
+
// Use classes first
|
|
1026
|
+
const classes = Array.from(element.classList);
|
|
1027
|
+
if (classes.length > 0) {
|
|
1028
|
+
const classSelector = classes
|
|
1029
|
+
.map((cls) => `contains(@class, '${cls}')`)
|
|
1030
|
+
.join(" and ");
|
|
1031
|
+
return `${tagName}[${classSelector}]`;
|
|
1032
|
+
}
|
|
1033
|
+
// Try meaningful attributes
|
|
1034
|
+
const meaningfulAttrs = ["role", "type", "name", "src", "aria-label"];
|
|
1035
|
+
for (const attrName of meaningfulAttrs) {
|
|
1036
|
+
if (element.hasAttribute(attrName)) {
|
|
1037
|
+
const value = element.getAttribute(attrName).replace(/'/g, "\\'");
|
|
1038
|
+
return `${tagName}[@${attrName}='${value}']`;
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1041
|
+
// Try test ID
|
|
1042
|
+
const testId = element.getAttribute("data-testid");
|
|
1043
|
+
if (testId) {
|
|
1044
|
+
return `${tagName}[@data-testid='${testId}']`;
|
|
1045
|
+
}
|
|
1046
|
+
// Try ID
|
|
1047
|
+
if (element.id && !element.id.match(/^\d/)) {
|
|
1048
|
+
return `${tagName}[@id='${element.id}']`;
|
|
1049
|
+
}
|
|
1050
|
+
// Try other data attributes
|
|
1051
|
+
for (const attr of Array.from(element.attributes)) {
|
|
1052
|
+
if (attr.name.startsWith("data-") &&
|
|
1053
|
+
attr.name !== "data-testid" &&
|
|
1054
|
+
attr.name !== "data-mx-id" &&
|
|
1055
|
+
attr.value) {
|
|
1056
|
+
return `${tagName}[@${attr.name}='${attr.value}']`;
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
// Fallback to position
|
|
1060
|
+
const position = getSiblingPosition(element, parent);
|
|
1061
|
+
return `${tagName}[${position}]`;
|
|
1062
|
+
};
|
|
1063
|
+
// Get optimized structural path
|
|
1064
|
+
const getOptimizedStructuralPath = (targetElement, rootElement) => {
|
|
1065
|
+
if (!rootElement.contains(targetElement) || targetElement === rootElement) {
|
|
1066
|
+
return null;
|
|
1067
|
+
}
|
|
1068
|
+
const pathParts = [];
|
|
1069
|
+
let current = targetElement;
|
|
1070
|
+
// Build path from target up to root
|
|
1071
|
+
while (current && current !== rootElement) {
|
|
1072
|
+
const pathPart = generateOptimizedStructuralStep(current, rootElement);
|
|
1073
|
+
if (pathPart) {
|
|
1074
|
+
pathParts.unshift(pathPart);
|
|
1075
|
+
}
|
|
1076
|
+
current = current.parentElement;
|
|
1077
|
+
if (!current)
|
|
1078
|
+
break;
|
|
1079
|
+
}
|
|
1080
|
+
return pathParts.length > 0 ? "/" + pathParts.join("/") : null;
|
|
1081
|
+
};
|
|
1082
|
+
try {
|
|
1083
|
+
// Get the first element from fallback selector
|
|
1084
|
+
let targetElement = null;
|
|
1085
|
+
if (isXPathSelector(fallbackSel)) {
|
|
1086
|
+
const elements = evaluateXPath(fallbackSel);
|
|
1087
|
+
targetElement = elements[0] || null;
|
|
1088
|
+
}
|
|
1089
|
+
else {
|
|
1090
|
+
targetElement = document.querySelector(fallbackSel);
|
|
1091
|
+
}
|
|
1092
|
+
if (!targetElement) {
|
|
1093
|
+
return null;
|
|
1094
|
+
}
|
|
1095
|
+
// Get the list container elements
|
|
1096
|
+
const parentElements = evaluateXPath(listSel);
|
|
1097
|
+
let containingParent = null;
|
|
1098
|
+
for (const parent of parentElements) {
|
|
1099
|
+
if (parent.contains(targetElement)) {
|
|
1100
|
+
containingParent = parent;
|
|
1101
|
+
break;
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
if (!containingParent) {
|
|
1105
|
+
return null;
|
|
1106
|
+
}
|
|
1107
|
+
// Build structural path
|
|
1108
|
+
const structuralPath = getOptimizedStructuralPath(targetElement, containingParent);
|
|
1109
|
+
if (!structuralPath) {
|
|
1110
|
+
return null;
|
|
1111
|
+
}
|
|
1112
|
+
// Combine list selector with structural path
|
|
1113
|
+
const newSelector = listSel + structuralPath;
|
|
1114
|
+
console.log(`Generated field selector: ${newSelector} from fallback: ${fallbackSel}`);
|
|
1115
|
+
return newSelector;
|
|
1116
|
+
}
|
|
1117
|
+
catch (error) {
|
|
1118
|
+
console.error("Error generating field selector:", error);
|
|
1119
|
+
return null;
|
|
1120
|
+
}
|
|
1121
|
+
}, { fallbackSel: fallbackSelector, listSel: listSelector });
|
|
1122
|
+
return newSelector;
|
|
1123
|
+
}
|
|
1124
|
+
catch (error) {
|
|
1125
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
1126
|
+
console.error(`Failed to generate field selector: ${errorMessage}`);
|
|
1127
|
+
return null;
|
|
1128
|
+
}
|
|
1129
|
+
});
|
|
1130
|
+
exports.generateListFieldSelectorFromFallback = generateListFieldSelectorFromFallback;
|
|
1131
|
+
/* Generate all schema field selectors from fallback selector (CSS to CSS)
|
|
1132
|
+
* @param page - Playwright page object
|
|
1133
|
+
* @param fallbackSelector - CSS fallback selector to use
|
|
1134
|
+
* @returns Array of all generated CSS selectors or null if failed
|
|
1135
|
+
*/
|
|
1136
|
+
const generateFieldSelectorFromFallback = (page, fallbackSelector) => __awaiter(void 0, void 0, void 0, function* () {
|
|
1137
|
+
try {
|
|
1138
|
+
// Execute schema field selector generation within the page context
|
|
1139
|
+
const selectors = yield page.evaluate((fallbackSel) => {
|
|
1140
|
+
// CSS escape function (simplified version from your reference)
|
|
1141
|
+
function cssesc(string, options = {}) {
|
|
1142
|
+
const { isIdentifier = false } = options;
|
|
1143
|
+
let output = '';
|
|
1144
|
+
for (let i = 0; i < string.length; i++) {
|
|
1145
|
+
const char = string.charAt(i);
|
|
1146
|
+
const code = char.charCodeAt(0);
|
|
1147
|
+
if (code < 0x20 || code > 0x7e) {
|
|
1148
|
+
output += '\\' + code.toString(16).toUpperCase() + ' ';
|
|
1149
|
+
}
|
|
1150
|
+
else if (/[\t\n\f\r\x0B]/.test(char)) {
|
|
1151
|
+
output += '\\' + code.toString(16).toUpperCase() + ' ';
|
|
1152
|
+
}
|
|
1153
|
+
else if (char === '\\' || (isIdentifier && /[ -,\.\/:-@\[\]\^`\{-~]/.test(char))) {
|
|
1154
|
+
output += '\\' + char;
|
|
1155
|
+
}
|
|
1156
|
+
else {
|
|
1157
|
+
output += char;
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
if (isIdentifier && /\d/.test(string.charAt(0))) {
|
|
1161
|
+
output = '\\3' + string.charAt(0) + ' ' + output.slice(1);
|
|
1162
|
+
}
|
|
1163
|
+
return output;
|
|
1164
|
+
}
|
|
1165
|
+
// Main finder function (simplified version from your reference)
|
|
1166
|
+
function finder(input, options = {}) {
|
|
1167
|
+
if (input.nodeType !== Node.ELEMENT_NODE) {
|
|
1168
|
+
throw new Error("Can't generate CSS selector for non-element node type.");
|
|
1169
|
+
}
|
|
1170
|
+
if (input.tagName.toLowerCase() === 'html') {
|
|
1171
|
+
return 'html';
|
|
1172
|
+
}
|
|
1173
|
+
// If attr function is provided, use it to filter attributes
|
|
1174
|
+
if (options.attr) {
|
|
1175
|
+
const attrs = Array.from(input.attributes).filter(attr => options.attr(attr.name) && attr.name !== 'data-mx-id');
|
|
1176
|
+
if (attrs.length > 0) {
|
|
1177
|
+
const attr = attrs[0];
|
|
1178
|
+
return `[${cssesc(attr.name, { isIdentifier: true })}="${cssesc(attr.value)}"]`;
|
|
1179
|
+
}
|
|
1180
|
+
}
|
|
1181
|
+
// Try ID first
|
|
1182
|
+
const elementId = input.getAttribute('id');
|
|
1183
|
+
if (elementId && !elementId.match(/^\d/)) {
|
|
1184
|
+
return '#' + cssesc(elementId, { isIdentifier: true });
|
|
1185
|
+
}
|
|
1186
|
+
// Try classes
|
|
1187
|
+
const classes = Array.from(input.classList);
|
|
1188
|
+
if (classes.length > 0) {
|
|
1189
|
+
const classSelector = classes.map(cls => '.' + cssesc(cls, { isIdentifier: true })).join('');
|
|
1190
|
+
const tagName = input.tagName.toLowerCase();
|
|
1191
|
+
return tagName + classSelector;
|
|
1192
|
+
}
|
|
1193
|
+
// Try attributes
|
|
1194
|
+
const meaningfulAttrs = ['data-testid', 'data-test-id', 'data-testing', 'data-test', 'data-qa', 'data-cy', 'name', 'aria-label', 'alt', 'title', 'href', 'role', 'type'];
|
|
1195
|
+
for (const attrName of meaningfulAttrs) {
|
|
1196
|
+
if (input.hasAttribute(attrName) && attrName !== 'data-mx-id') {
|
|
1197
|
+
const value = input.getAttribute(attrName);
|
|
1198
|
+
if (value) {
|
|
1199
|
+
return `[${cssesc(attrName, { isIdentifier: true })}="${cssesc(value)}"]`;
|
|
1200
|
+
}
|
|
1201
|
+
}
|
|
1202
|
+
}
|
|
1203
|
+
// Fallback to tag name with nth-child if needed
|
|
1204
|
+
const tagName = input.tagName.toLowerCase();
|
|
1205
|
+
const parent = input.parentElement;
|
|
1206
|
+
if (parent) {
|
|
1207
|
+
const siblings = Array.from(parent.children).filter(child => child.tagName === input.tagName);
|
|
1208
|
+
if (siblings.length > 1) {
|
|
1209
|
+
const index = siblings.indexOf(input) + 1;
|
|
1210
|
+
return `${tagName}:nth-child(${index})`;
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
return tagName;
|
|
1214
|
+
}
|
|
1215
|
+
// Generate selectors for attributes
|
|
1216
|
+
function genSelectorForAttributes(element, attributes) {
|
|
1217
|
+
try {
|
|
1218
|
+
for (const attr of attributes) {
|
|
1219
|
+
if (element.hasAttribute(attr)) {
|
|
1220
|
+
const value = element.getAttribute(attr);
|
|
1221
|
+
if (value && value.length > 0) {
|
|
1222
|
+
if (attr === 'rel') {
|
|
1223
|
+
return `[rel="${value}"]`;
|
|
1224
|
+
}
|
|
1225
|
+
return `[${cssesc(attr, { isIdentifier: true })}="${cssesc(value)}"]`;
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1229
|
+
return null;
|
|
1230
|
+
}
|
|
1231
|
+
catch (e) {
|
|
1232
|
+
return null;
|
|
1233
|
+
}
|
|
1234
|
+
}
|
|
1235
|
+
// Check if character is number
|
|
1236
|
+
function isCharacterNumber(char) {
|
|
1237
|
+
return char && char.length === 1 && /[0-9]/.test(char);
|
|
1238
|
+
}
|
|
1239
|
+
// Generate attribute set
|
|
1240
|
+
function genAttributeSet(element, attributes) {
|
|
1241
|
+
return new Set(attributes.filter((attr) => {
|
|
1242
|
+
const attrValue = element.getAttribute(attr);
|
|
1243
|
+
return attrValue != null && attrValue.length > 0;
|
|
1244
|
+
}));
|
|
1245
|
+
}
|
|
1246
|
+
// Check if attributes are defined
|
|
1247
|
+
function isAttributesDefined(element, attributes) {
|
|
1248
|
+
return genAttributeSet(element, attributes).size > 0;
|
|
1249
|
+
}
|
|
1250
|
+
// Generate valid attribute filter
|
|
1251
|
+
function genValidAttributeFilter(element, attributes) {
|
|
1252
|
+
const attrSet = genAttributeSet(element, attributes);
|
|
1253
|
+
return (name) => attrSet.has(name);
|
|
1254
|
+
}
|
|
1255
|
+
// Main selector generation function (based on genSelectors from your reference)
|
|
1256
|
+
function genSelectors(element) {
|
|
1257
|
+
var _a;
|
|
1258
|
+
const href = element.getAttribute('href');
|
|
1259
|
+
let generalSelector = null;
|
|
1260
|
+
try {
|
|
1261
|
+
generalSelector = finder(element);
|
|
1262
|
+
}
|
|
1263
|
+
catch (e) {
|
|
1264
|
+
console.warn('Error generating general selector:', e);
|
|
1265
|
+
}
|
|
1266
|
+
let attrSelector = null;
|
|
1267
|
+
try {
|
|
1268
|
+
attrSelector = finder(element, { attr: () => true });
|
|
1269
|
+
}
|
|
1270
|
+
catch (e) {
|
|
1271
|
+
console.warn('Error generating attr selector:', e);
|
|
1272
|
+
}
|
|
1273
|
+
const relSelector = genSelectorForAttributes(element, ['rel']);
|
|
1274
|
+
const hrefSelector = genSelectorForAttributes(element, ['href']);
|
|
1275
|
+
const formSelector = genSelectorForAttributes(element, ['name', 'placeholder', 'for']);
|
|
1276
|
+
const accessibilitySelector = genSelectorForAttributes(element, ['aria-label', 'alt', 'title']);
|
|
1277
|
+
const testIdSelector = genSelectorForAttributes(element, [
|
|
1278
|
+
'data-testid', 'data-test-id', 'data-testing', 'data-test', 'data-qa', 'data-cy'
|
|
1279
|
+
]);
|
|
1280
|
+
// We won't use an id selector if the id is invalid (starts with a number)
|
|
1281
|
+
let idSelector = null;
|
|
1282
|
+
try {
|
|
1283
|
+
idSelector = isAttributesDefined(element, ['id']) &&
|
|
1284
|
+
!isCharacterNumber((_a = element.id) === null || _a === void 0 ? void 0 : _a[0])
|
|
1285
|
+
? finder(element, {
|
|
1286
|
+
attr: (name) => name === 'id',
|
|
1287
|
+
})
|
|
1288
|
+
: null;
|
|
1289
|
+
}
|
|
1290
|
+
catch (e) {
|
|
1291
|
+
console.warn('Error generating id selector:', e);
|
|
1292
|
+
}
|
|
1293
|
+
return {
|
|
1294
|
+
id: idSelector,
|
|
1295
|
+
generalSelector,
|
|
1296
|
+
attrSelector,
|
|
1297
|
+
testIdSelector,
|
|
1298
|
+
text: element.innerText,
|
|
1299
|
+
href: href !== null && href !== void 0 ? href : undefined,
|
|
1300
|
+
hrefSelector,
|
|
1301
|
+
accessibilitySelector,
|
|
1302
|
+
formSelector,
|
|
1303
|
+
relSelector,
|
|
1304
|
+
};
|
|
1305
|
+
}
|
|
1306
|
+
try {
|
|
1307
|
+
// Get the target element from CSS fallback selector
|
|
1308
|
+
const targetElement = document.querySelector(fallbackSel);
|
|
1309
|
+
if (!targetElement) {
|
|
1310
|
+
console.warn('Target element not found with CSS fallback selector:', fallbackSel);
|
|
1311
|
+
return null;
|
|
1312
|
+
}
|
|
1313
|
+
// Prioritize Link logic (from your reference)
|
|
1314
|
+
const { parentElement } = targetElement;
|
|
1315
|
+
const element = (parentElement === null || parentElement === void 0 ? void 0 : parentElement.tagName) === 'A' ? parentElement : targetElement;
|
|
1316
|
+
// Generate all selectors using the same logic as your reference
|
|
1317
|
+
const generatedSelectors = genSelectors(element);
|
|
1318
|
+
console.log('Generated schema field CSS selectors:', generatedSelectors);
|
|
1319
|
+
return generatedSelectors;
|
|
1320
|
+
}
|
|
1321
|
+
catch (error) {
|
|
1322
|
+
console.error('Error in schema field CSS selector generation:', error);
|
|
1323
|
+
return null;
|
|
1324
|
+
}
|
|
1325
|
+
}, fallbackSelector);
|
|
1326
|
+
return selectors;
|
|
1327
|
+
}
|
|
1328
|
+
catch (error) {
|
|
1329
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
1330
|
+
console.error(`Failed to generate schema field CSS selectors: ${errorMessage}`);
|
|
1331
|
+
return null;
|
|
1332
|
+
}
|
|
1333
|
+
});
|
|
1334
|
+
exports.generateFieldSelectorFromFallback = generateFieldSelectorFromFallback;
|
|
@@ -83,6 +83,7 @@ export interface SchemaConfig {
|
|
|
83
83
|
nodeInfo: SerializedNode;
|
|
84
84
|
selector: string;
|
|
85
85
|
attribute: string;
|
|
86
|
+
fallbackSelector?: string;
|
|
86
87
|
coordinates: Coordinates;
|
|
87
88
|
elementMetadata: {
|
|
88
89
|
classList: string[];
|
|
@@ -93,6 +94,7 @@ export interface SchemaConfig {
|
|
|
93
94
|
}
|
|
94
95
|
export interface ScrapeListSchema {
|
|
95
96
|
listSelector: string;
|
|
97
|
+
listFallbackSelector?: string;
|
|
96
98
|
listSelectorInfo: {
|
|
97
99
|
nodeInfo: SerializedNode;
|
|
98
100
|
coordinates: Coordinates;
|